]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
tree-wide: define iterator inside of the macro
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
55cbfaa5
DM
40static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 53 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
54};
55
56DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
a8548816 58#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
6a0f1f6d 59
fd38203a 60struct sd_event {
da7e457c 61 unsigned n_ref;
fd38203a
LP
62
63 int epoll_fd;
cde93897 64 int watchdog_fd;
fd38203a
LP
65
66 Prioq *pending;
67 Prioq *prepare;
c2ba3ad6 68
a8548816 69 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
a8548816 73 struct clock_data boottime;
6a0f1f6d
LP
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
fd38203a 77
da7e457c
LP
78 usec_t perturb;
79
9da4cb2b
LP
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
82
83 Hashmap *child_sources;
baf76283 84 unsigned n_enabled_child_sources;
fd38203a 85
6e9feda3
LP
86 Set *post_sources;
87
6203e07a 88 Prioq *exit;
fd38203a 89
97ef5391
LP
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
da7e457c 98 pid_t original_pid;
c2ba3ad6 99
60a3b1e1 100 uint64_t iteration;
e475d10c 101 triple_timestamp timestamp;
da7e457c 102 int state;
eaa3cbef 103
6203e07a 104 bool exit_requested:1;
da7e457c 105 bool need_process_child:1;
cde93897 106 bool watchdog:1;
34b87517 107 bool profile_delays:1;
afc6adb5 108
6203e07a
LP
109 int exit_code;
110
afc6adb5
LP
111 pid_t tid;
112 sd_event **default_event_ptr;
cde93897
LP
113
114 usec_t watchdog_last, watchdog_period;
15b38f93
LP
115
116 unsigned n_sources;
a71fe8b8 117
5cddd924
LP
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
a71fe8b8 121 LIST_HEAD(sd_event_source, sources);
34b87517
VC
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
125};
126
b937d761
NM
127static thread_local sd_event *default_event = NULL;
128
a71fe8b8 129static void source_disconnect(sd_event_source *s);
97ef5391 130static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 131
b937d761
NM
132static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134}
135
fd38203a
LP
136static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
9c57a73b 138 int r;
fd38203a
LP
139
140 assert(x->pending);
141 assert(y->pending);
142
baf76283
LP
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 145 return -1;
baf76283 146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
147 return 1;
148
149 /* Lower priority values first */
9c57a73b
YW
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
fd38203a
LP
153
154 /* Older entries first */
9c57a73b 155 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
156}
157
158static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
9c57a73b 160 int r;
fd38203a
LP
161
162 assert(x->prepare);
163 assert(y->prepare);
164
8046c457
KK
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
fd38203a
LP
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
9c57a73b
YW
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
fd38203a 177
fd38203a 178 /* Lower priority values first */
9c57a73b 179 return CMP(x->priority, y->priority);
fd38203a
LP
180}
181
c2ba3ad6 182static int earliest_time_prioq_compare(const void *a, const void *b) {
fd38203a
LP
183 const sd_event_source *x = a, *y = b;
184
6a0f1f6d
LP
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
fd38203a 187
baf76283
LP
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 190 return -1;
baf76283 191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
9c57a73b 201 return CMP(x->time.next, y->time.next);
fd38203a
LP
202}
203
1bce0ffa
LP
204static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206}
207
c2ba3ad6
LP
208static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
6a0f1f6d
LP
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
c2ba3ad6 213
baf76283
LP
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
c2ba3ad6 216 return -1;
baf76283 217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
c2ba3ad6
LP
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
9c57a73b 227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
c2ba3ad6
LP
228}
229
6203e07a 230static int exit_prioq_compare(const void *a, const void *b) {
da7e457c
LP
231 const sd_event_source *x = a, *y = b;
232
6203e07a
LP
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
da7e457c 235
baf76283
LP
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
da7e457c 238 return -1;
baf76283 239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
da7e457c
LP
240 return 1;
241
242 /* Lower priority values first */
6dd91b36 243 return CMP(x->priority, y->priority);
da7e457c
LP
244}
245
6a0f1f6d
LP
246static void free_clock_data(struct clock_data *d) {
247 assert(d);
9da4cb2b 248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253}
254
8301aa0b 255static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
256 sd_event_source *s;
257
fd38203a 258 assert(e);
a71fe8b8
LP
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
15b38f93 266 assert(e->n_sources == 0);
fd38203a 267
afc6adb5
LP
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
03e334a1 271 safe_close(e->epoll_fd);
03e334a1 272 safe_close(e->watchdog_fd);
cde93897 273
6a0f1f6d 274 free_clock_data(&e->realtime);
a8548816 275 free_clock_data(&e->boottime);
6a0f1f6d
LP
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
fd38203a
LP
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
6203e07a 282 prioq_free(e->exit);
fd38203a
LP
283
284 free(e->signal_sources);
9da4cb2b 285 hashmap_free(e->signal_data);
fd38203a 286
97ef5391
LP
287 hashmap_free(e->inotify_data);
288
fd38203a 289 hashmap_free(e->child_sources);
6e9feda3 290 set_free(e->post_sources);
8301aa0b 291
5cddd924
LP
292 free(e->event_queue);
293
8301aa0b 294 return mfree(e);
fd38203a
LP
295}
296
f7262a9f 297_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
298 sd_event *e;
299 int r;
300
305f78bf 301 assert_return(ret, -EINVAL);
fd38203a 302
d08eb1fa 303 e = new(sd_event, 1);
fd38203a
LP
304 if (!e)
305 return -ENOMEM;
306
d08eb1fa
LP
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
fd38203a 329
c983e776
EV
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
fd38203a 332 goto fail;
fd38203a
LP
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
7fe2903c
LP
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
34b87517 342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
34a6843d 343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
34b87517
VC
344 e->profile_delays = true;
345 }
346
fd38203a
LP
347 *ret = e;
348 return 0;
349
350fail:
351 event_free(e);
352 return r;
353}
354
8301aa0b 355DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 356
afd15bbb
ZJS
357_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361}
362
eaa3cbef
LP
363static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
a2360a46 366 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
367 * it around over a fork(). Let's complain. */
368
df0ff127 369 return e->original_pid != getpid_cached();
eaa3cbef
LP
370}
371
366e6411 372static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
f6806734 376 if (event_pid_changed(s->event))
366e6411 377 return;
f6806734 378
fd38203a 379 if (!s->io.registered)
366e6411 380 return;
fd38203a 381
d1cf2023 382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
55cbfaa5
DM
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
385
386 s->io.registered = false;
fd38203a
LP
387}
388
305f78bf
LP
389static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
fd38203a
LP
394 assert(s);
395 assert(s->type == SOURCE_IO);
baf76283 396 assert(enabled != SD_EVENT_OFF);
fd38203a 397
1eac7948 398 struct epoll_event ev = {
a82f89aa
LP
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
1eac7948 402 int r;
fd38203a 403
1eac7948
ZJS
404 r = epoll_ctl(s->event->epoll_fd,
405 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
406 s->io.fd,
407 &ev);
fd38203a
LP
408 if (r < 0)
409 return -errno;
410
411 s->io.registered = true;
412
413 return 0;
414}
415
f8f3f926
LP
416static void source_child_pidfd_unregister(sd_event_source *s) {
417 assert(s);
418 assert(s->type == SOURCE_CHILD);
419
420 if (event_pid_changed(s->event))
421 return;
422
423 if (!s->child.registered)
424 return;
425
426 if (EVENT_SOURCE_WATCH_PIDFD(s))
427 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
428 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
429 strna(s->description), event_source_type_to_string(s->type));
430
431 s->child.registered = false;
432}
433
434static int source_child_pidfd_register(sd_event_source *s, int enabled) {
435 int r;
436
437 assert(s);
438 assert(s->type == SOURCE_CHILD);
439 assert(enabled != SD_EVENT_OFF);
440
441 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 442 struct epoll_event ev = {
f8f3f926
LP
443 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
444 .data.ptr = s,
445 };
446
447 if (s->child.registered)
448 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
449 else
450 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
451 if (r < 0)
452 return -errno;
453 }
454
455 s->child.registered = true;
456 return 0;
457}
458
6a0f1f6d
LP
459static clockid_t event_source_type_to_clock(EventSourceType t) {
460
461 switch (t) {
462
463 case SOURCE_TIME_REALTIME:
464 return CLOCK_REALTIME;
465
a8548816
TG
466 case SOURCE_TIME_BOOTTIME:
467 return CLOCK_BOOTTIME;
468
6a0f1f6d
LP
469 case SOURCE_TIME_MONOTONIC:
470 return CLOCK_MONOTONIC;
471
472 case SOURCE_TIME_REALTIME_ALARM:
473 return CLOCK_REALTIME_ALARM;
474
475 case SOURCE_TIME_BOOTTIME_ALARM:
476 return CLOCK_BOOTTIME_ALARM;
477
478 default:
479 return (clockid_t) -1;
480 }
481}
482
483static EventSourceType clock_to_event_source_type(clockid_t clock) {
484
485 switch (clock) {
486
487 case CLOCK_REALTIME:
488 return SOURCE_TIME_REALTIME;
489
a8548816
TG
490 case CLOCK_BOOTTIME:
491 return SOURCE_TIME_BOOTTIME;
492
6a0f1f6d
LP
493 case CLOCK_MONOTONIC:
494 return SOURCE_TIME_MONOTONIC;
495
496 case CLOCK_REALTIME_ALARM:
497 return SOURCE_TIME_REALTIME_ALARM;
498
499 case CLOCK_BOOTTIME_ALARM:
500 return SOURCE_TIME_BOOTTIME_ALARM;
501
502 default:
503 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
504 }
505}
506
507static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
508 assert(e);
509
510 switch (t) {
511
512 case SOURCE_TIME_REALTIME:
513 return &e->realtime;
514
a8548816
TG
515 case SOURCE_TIME_BOOTTIME:
516 return &e->boottime;
517
6a0f1f6d
LP
518 case SOURCE_TIME_MONOTONIC:
519 return &e->monotonic;
520
521 case SOURCE_TIME_REALTIME_ALARM:
522 return &e->realtime_alarm;
523
524 case SOURCE_TIME_BOOTTIME_ALARM:
525 return &e->boottime_alarm;
526
527 default:
528 return NULL;
529 }
530}
531
3e4eb8e7
YW
532static void event_free_signal_data(sd_event *e, struct signal_data *d) {
533 assert(e);
534
535 if (!d)
536 return;
537
538 hashmap_remove(e->signal_data, &d->priority);
539 safe_close(d->fd);
540 free(d);
541}
542
9da4cb2b
LP
543static int event_make_signal_data(
544 sd_event *e,
545 int sig,
546 struct signal_data **ret) {
4807d2d0 547
9da4cb2b
LP
548 struct signal_data *d;
549 bool added = false;
550 sigset_t ss_copy;
551 int64_t priority;
f95387cd
ZJS
552 int r;
553
554 assert(e);
555
f6806734 556 if (event_pid_changed(e))
9da4cb2b 557 return -ECHILD;
f6806734 558
9da4cb2b
LP
559 if (e->signal_sources && e->signal_sources[sig])
560 priority = e->signal_sources[sig]->priority;
561 else
de05913d 562 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 563
9da4cb2b
LP
564 d = hashmap_get(e->signal_data, &priority);
565 if (d) {
566 if (sigismember(&d->sigset, sig) > 0) {
567 if (ret)
568 *ret = d;
569 return 0;
570 }
571 } else {
572 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
573 if (r < 0)
574 return r;
575
d08eb1fa 576 d = new(struct signal_data, 1);
9da4cb2b
LP
577 if (!d)
578 return -ENOMEM;
579
d08eb1fa
LP
580 *d = (struct signal_data) {
581 .wakeup = WAKEUP_SIGNAL_DATA,
582 .fd = -1,
583 .priority = priority,
584 };
9da4cb2b
LP
585
586 r = hashmap_put(e->signal_data, &d->priority, d);
90f604d1
ZJS
587 if (r < 0) {
588 free(d);
9da4cb2b 589 return r;
90f604d1 590 }
f95387cd 591
9da4cb2b
LP
592 added = true;
593 }
594
595 ss_copy = d->sigset;
596 assert_se(sigaddset(&ss_copy, sig) >= 0);
597
598 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
599 if (r < 0) {
600 r = -errno;
601 goto fail;
602 }
603
604 d->sigset = ss_copy;
f95387cd 605
9da4cb2b
LP
606 if (d->fd >= 0) {
607 if (ret)
608 *ret = d;
f95387cd 609 return 0;
9da4cb2b
LP
610 }
611
7fe2903c 612 d->fd = fd_move_above_stdio(r);
f95387cd 613
1eac7948 614 struct epoll_event ev = {
a82f89aa
LP
615 .events = EPOLLIN,
616 .data.ptr = d,
617 };
f95387cd 618
9da4cb2b
LP
619 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
620 if (r < 0) {
621 r = -errno;
622 goto fail;
f95387cd
ZJS
623 }
624
9da4cb2b
LP
625 if (ret)
626 *ret = d;
627
f95387cd 628 return 0;
9da4cb2b
LP
629
630fail:
3e4eb8e7
YW
631 if (added)
632 event_free_signal_data(e, d);
9da4cb2b
LP
633
634 return r;
635}
636
637static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
638 assert(e);
639 assert(d);
640
641 /* Turns off the specified signal in the signal data
642 * object. If the signal mask of the object becomes empty that
643 * way removes it. */
644
645 if (sigismember(&d->sigset, sig) == 0)
646 return;
647
648 assert_se(sigdelset(&d->sigset, sig) >= 0);
649
650 if (sigisemptyset(&d->sigset)) {
9da4cb2b 651 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 652 event_free_signal_data(e, d);
9da4cb2b
LP
653 return;
654 }
655
656 assert(d->fd >= 0);
657
658 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
659 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
660}
661
662static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
663 struct signal_data *d;
664 static const int64_t zero_priority = 0;
665
666 assert(e);
667
f8f3f926
LP
668 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
669 * and possibly drop the signalfd for it. */
9da4cb2b
LP
670
671 if (sig == SIGCHLD &&
672 e->n_enabled_child_sources > 0)
673 return;
674
675 if (e->signal_sources &&
676 e->signal_sources[sig] &&
677 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
678 return;
679
680 /*
681 * The specified signal might be enabled in three different queues:
682 *
683 * 1) the one that belongs to the priority passed (if it is non-NULL)
684 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
685 * 3) the 0 priority (to cover the SIGCHLD case)
686 *
687 * Hence, let's remove it from all three here.
688 */
689
690 if (priority) {
691 d = hashmap_get(e->signal_data, priority);
692 if (d)
693 event_unmask_signal_data(e, d, sig);
694 }
695
696 if (e->signal_sources && e->signal_sources[sig]) {
697 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
698 if (d)
699 event_unmask_signal_data(e, d, sig);
700 }
701
702 d = hashmap_get(e->signal_data, &zero_priority);
703 if (d)
704 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
705}
706
a71fe8b8
LP
707static void source_disconnect(sd_event_source *s) {
708 sd_event *event;
709
fd38203a
LP
710 assert(s);
711
a71fe8b8
LP
712 if (!s->event)
713 return;
15b38f93 714
a71fe8b8 715 assert(s->event->n_sources > 0);
fd38203a 716
a71fe8b8 717 switch (s->type) {
fd38203a 718
a71fe8b8
LP
719 case SOURCE_IO:
720 if (s->io.fd >= 0)
721 source_io_unregister(s);
fd38203a 722
a71fe8b8 723 break;
6a0f1f6d 724
a71fe8b8 725 case SOURCE_TIME_REALTIME:
a8548816 726 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
727 case SOURCE_TIME_MONOTONIC:
728 case SOURCE_TIME_REALTIME_ALARM:
729 case SOURCE_TIME_BOOTTIME_ALARM: {
730 struct clock_data *d;
fd38203a 731
a71fe8b8
LP
732 d = event_get_clock_data(s->event, s->type);
733 assert(d);
734
735 prioq_remove(d->earliest, s, &s->time.earliest_index);
736 prioq_remove(d->latest, s, &s->time.latest_index);
212bbb17 737 d->needs_rearm = true;
a71fe8b8
LP
738 break;
739 }
740
741 case SOURCE_SIGNAL:
742 if (s->signal.sig > 0) {
9da4cb2b 743
a71fe8b8
LP
744 if (s->event->signal_sources)
745 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 746
9da4cb2b 747 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 748 }
fd38203a 749
a71fe8b8 750 break;
fd38203a 751
a71fe8b8
LP
752 case SOURCE_CHILD:
753 if (s->child.pid > 0) {
754 if (s->enabled != SD_EVENT_OFF) {
755 assert(s->event->n_enabled_child_sources > 0);
756 s->event->n_enabled_child_sources--;
4807d2d0 757 }
fd38203a 758
4a0b58c4 759 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 760 }
fd38203a 761
f8f3f926
LP
762 if (EVENT_SOURCE_WATCH_PIDFD(s))
763 source_child_pidfd_unregister(s);
764 else
765 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
766
a71fe8b8 767 break;
fd38203a 768
a71fe8b8
LP
769 case SOURCE_DEFER:
770 /* nothing */
771 break;
fd38203a 772
a71fe8b8
LP
773 case SOURCE_POST:
774 set_remove(s->event->post_sources, s);
775 break;
da7e457c 776
a71fe8b8
LP
777 case SOURCE_EXIT:
778 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
779 break;
0eb2e0e3 780
97ef5391
LP
781 case SOURCE_INOTIFY: {
782 struct inode_data *inode_data;
783
784 inode_data = s->inotify.inode_data;
785 if (inode_data) {
786 struct inotify_data *inotify_data;
787 assert_se(inotify_data = inode_data->inotify_data);
788
789 /* Detach this event source from the inode object */
790 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
791 s->inotify.inode_data = NULL;
792
793 if (s->pending) {
794 assert(inotify_data->n_pending > 0);
795 inotify_data->n_pending--;
796 }
797
798 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
799 * continued to being watched. That's because inotify doesn't really have an API for that: we
800 * can only change watch masks with access to the original inode either by fd or by path. But
801 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 802 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
803 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
804 * there), but given the need for open_by_handle_at() which is privileged and not universally
805 * available this would be quite an incomplete solution. Hence we go the other way, leave the
806 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
807 * anymore after reception. Yes, this sucks, but … Linux … */
808
809 /* Maybe release the inode data (and its inotify) */
810 event_gc_inode_data(s->event, inode_data);
811 }
812
813 break;
814 }
815
a71fe8b8
LP
816 default:
817 assert_not_reached("Wut? I shouldn't exist.");
818 }
6e9feda3 819
a71fe8b8
LP
820 if (s->pending)
821 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 822
a71fe8b8
LP
823 if (s->prepare)
824 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 825
e514aa1e 826 event = TAKE_PTR(s->event);
a71fe8b8
LP
827 LIST_REMOVE(sources, event->sources, s);
828 event->n_sources--;
fd38203a 829
f5982559
LP
830 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
831 * pidfd associated with this event source, which we'll do only on source_free(). */
832
a71fe8b8
LP
833 if (!s->floating)
834 sd_event_unref(event);
835}
836
837static void source_free(sd_event_source *s) {
838 assert(s);
fd38203a 839
a71fe8b8 840 source_disconnect(s);
ab93297c
NM
841
842 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
843 s->io.fd = safe_close(s->io.fd);
844
f8f3f926
LP
845 if (s->type == SOURCE_CHILD) {
846 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
847
848 if (s->child.process_owned) {
849
850 if (!s->child.exited) {
851 bool sent = false;
852
853 if (s->child.pidfd >= 0) {
854 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
855 if (errno == ESRCH) /* Already dead */
856 sent = true;
857 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
858 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
859 s->child.pid);
860 } else
861 sent = true;
862 }
863
864 if (!sent)
865 if (kill(s->child.pid, SIGKILL) < 0)
866 if (errno != ESRCH) /* Already dead */
867 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
868 s->child.pid);
869 }
870
871 if (!s->child.waited) {
872 siginfo_t si = {};
873
874 /* Reap the child if we can */
875 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
876 }
877 }
878
879 if (s->child.pidfd_owned)
880 s->child.pidfd = safe_close(s->child.pidfd);
881 }
882
15723a1d
LP
883 if (s->destroy_callback)
884 s->destroy_callback(s->userdata);
ab93297c 885
356779df 886 free(s->description);
fd38203a
LP
887 free(s);
888}
8c75fe17 889DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
890
891static int source_set_pending(sd_event_source *s, bool b) {
892 int r;
893
894 assert(s);
6203e07a 895 assert(s->type != SOURCE_EXIT);
fd38203a
LP
896
897 if (s->pending == b)
898 return 0;
899
900 s->pending = b;
901
902 if (b) {
903 s->pending_iteration = s->event->iteration;
904
905 r = prioq_put(s->event->pending, s, &s->pending_index);
906 if (r < 0) {
907 s->pending = false;
908 return r;
909 }
910 } else
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
912
6a0f1f6d
LP
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
915
916 d = event_get_clock_data(s->event, s->type);
917 assert(d);
918
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 921 d->needs_rearm = true;
2576a19e
LP
922 }
923
9da4cb2b
LP
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
926
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
929 d->current = NULL;
930 }
931
97ef5391
LP
932 if (s->type == SOURCE_INOTIFY) {
933
934 assert(s->inotify.inode_data);
935 assert(s->inotify.inode_data->inotify_data);
936
937 if (b)
938 s->inotify.inode_data->inotify_data->n_pending ++;
939 else {
940 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
941 s->inotify.inode_data->inotify_data->n_pending --;
942 }
943 }
944
fd38203a
LP
945 return 0;
946}
947
a71fe8b8 948static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
949 sd_event_source *s;
950
951 assert(e);
952
d08eb1fa 953 s = new(sd_event_source, 1);
fd38203a
LP
954 if (!s)
955 return NULL;
956
d08eb1fa
LP
957 *s = (struct sd_event_source) {
958 .n_ref = 1,
959 .event = e,
960 .floating = floating,
961 .type = type,
962 .pending_index = PRIOQ_IDX_NULL,
963 .prepare_index = PRIOQ_IDX_NULL,
964 };
a71fe8b8
LP
965
966 if (!floating)
967 sd_event_ref(e);
fd38203a 968
a71fe8b8 969 LIST_PREPEND(sources, e->sources, s);
313cefa1 970 e->n_sources++;
15b38f93 971
fd38203a
LP
972 return s;
973}
974
f7262a9f 975_public_ int sd_event_add_io(
fd38203a 976 sd_event *e,
151b9b96 977 sd_event_source **ret,
fd38203a
LP
978 int fd,
979 uint32_t events,
718db961 980 sd_event_io_handler_t callback,
151b9b96 981 void *userdata) {
fd38203a 982
ec766a51 983 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
984 int r;
985
305f78bf 986 assert_return(e, -EINVAL);
b937d761 987 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 988 assert_return(fd >= 0, -EBADF);
2a16a986 989 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
305f78bf 990 assert_return(callback, -EINVAL);
da7e457c 991 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 992 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 993
a71fe8b8 994 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
995 if (!s)
996 return -ENOMEM;
997
9da4cb2b 998 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
999 s->io.fd = fd;
1000 s->io.events = events;
1001 s->io.callback = callback;
1002 s->userdata = userdata;
baf76283 1003 s->enabled = SD_EVENT_ON;
fd38203a 1004
baf76283 1005 r = source_io_register(s, s->enabled, events);
ec766a51 1006 if (r < 0)
050f74f2 1007 return r;
fd38203a 1008
a71fe8b8
LP
1009 if (ret)
1010 *ret = s;
ec766a51 1011 TAKE_PTR(s);
a71fe8b8 1012
fd38203a
LP
1013 return 0;
1014}
1015
52444dc4
LP
1016static void initialize_perturb(sd_event *e) {
1017 sd_id128_t bootid = {};
1018
1019 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1020 the same time within each minute/second/250ms, so that
52444dc4
LP
1021 events all across the system can be coalesced into a single
1022 CPU wakeup. However, let's take some system-specific
1023 randomness for this value, so that in a network of systems
1024 with synced clocks timer events are distributed a
1025 bit. Here, we calculate a perturbation usec offset from the
1026 boot ID. */
1027
3a43da28 1028 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1029 return;
1030
1031 if (sd_id128_get_boot(&bootid) >= 0)
1032 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1033}
1034
fd38203a
LP
1035static int event_setup_timer_fd(
1036 sd_event *e,
6a0f1f6d
LP
1037 struct clock_data *d,
1038 clockid_t clock) {
fd38203a 1039
fd38203a 1040 assert(e);
6a0f1f6d 1041 assert(d);
fd38203a 1042
6a0f1f6d 1043 if (_likely_(d->fd >= 0))
fd38203a
LP
1044 return 0;
1045
b44d87e2
ZJS
1046 _cleanup_close_ int fd = -1;
1047 int r;
1048
6a0f1f6d 1049 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1050 if (fd < 0)
1051 return -errno;
1052
7fe2903c
LP
1053 fd = fd_move_above_stdio(fd);
1054
1eac7948 1055 struct epoll_event ev = {
a82f89aa
LP
1056 .events = EPOLLIN,
1057 .data.ptr = d,
1058 };
fd38203a
LP
1059
1060 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
b44d87e2 1061 if (r < 0)
fd38203a 1062 return -errno;
fd38203a 1063
b44d87e2 1064 d->fd = TAKE_FD(fd);
fd38203a
LP
1065 return 0;
1066}
1067
c4f1aff2
TG
1068static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1069 assert(s);
1070
1071 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1072}
1073
6a0f1f6d 1074_public_ int sd_event_add_time(
fd38203a 1075 sd_event *e,
151b9b96 1076 sd_event_source **ret,
6a0f1f6d 1077 clockid_t clock,
fd38203a 1078 uint64_t usec,
c2ba3ad6 1079 uint64_t accuracy,
718db961 1080 sd_event_time_handler_t callback,
151b9b96 1081 void *userdata) {
fd38203a 1082
6a0f1f6d 1083 EventSourceType type;
ec766a51 1084 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1085 struct clock_data *d;
fd38203a
LP
1086 int r;
1087
305f78bf 1088 assert_return(e, -EINVAL);
b937d761 1089 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 1090 assert_return(accuracy != (uint64_t) -1, -EINVAL);
da7e457c 1091 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1092 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1093
e475d10c
LP
1094 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1095 return -EOPNOTSUPP;
1096
1097 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1098 if (type < 0)
3411372e
LP
1099 return -EOPNOTSUPP;
1100
c4f1aff2
TG
1101 if (!callback)
1102 callback = time_exit_callback;
1103
6a0f1f6d
LP
1104 d = event_get_clock_data(e, type);
1105 assert(d);
c2ba3ad6 1106
c983e776
EV
1107 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1108 if (r < 0)
1109 return r;
fd38203a 1110
c983e776
EV
1111 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1112 if (r < 0)
1113 return r;
fd38203a 1114
6a0f1f6d
LP
1115 if (d->fd < 0) {
1116 r = event_setup_timer_fd(e, d, clock);
fd38203a
LP
1117 if (r < 0)
1118 return r;
1119 }
1120
a71fe8b8 1121 s = source_new(e, !ret, type);
fd38203a
LP
1122 if (!s)
1123 return -ENOMEM;
1124
1125 s->time.next = usec;
c2ba3ad6 1126 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1127 s->time.callback = callback;
da7e457c 1128 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
fd38203a 1129 s->userdata = userdata;
baf76283 1130 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1131
e07bbb7c
TG
1132 d->needs_rearm = true;
1133
6a0f1f6d 1134 r = prioq_put(d->earliest, s, &s->time.earliest_index);
c2ba3ad6 1135 if (r < 0)
ec766a51 1136 return r;
c2ba3ad6 1137
6a0f1f6d 1138 r = prioq_put(d->latest, s, &s->time.latest_index);
c2ba3ad6 1139 if (r < 0)
ec766a51 1140 return r;
fd38203a 1141
a71fe8b8
LP
1142 if (ret)
1143 *ret = s;
ec766a51 1144 TAKE_PTR(s);
a71fe8b8 1145
fd38203a
LP
1146 return 0;
1147}
1148
d6a83dc4
LP
1149_public_ int sd_event_add_time_relative(
1150 sd_event *e,
1151 sd_event_source **ret,
1152 clockid_t clock,
1153 uint64_t usec,
1154 uint64_t accuracy,
1155 sd_event_time_handler_t callback,
1156 void *userdata) {
1157
1158 usec_t t;
1159 int r;
1160
1161 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1162 * checks for overflow. */
1163
1164 r = sd_event_now(e, clock, &t);
1165 if (r < 0)
1166 return r;
1167
1168 if (usec >= USEC_INFINITY - t)
1169 return -EOVERFLOW;
1170
1171 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1172}
1173
59bc1fd7
LP
1174static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1175 assert(s);
1176
1177 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1178}
1179
f7262a9f 1180_public_ int sd_event_add_signal(
305f78bf 1181 sd_event *e,
151b9b96 1182 sd_event_source **ret,
305f78bf 1183 int sig,
718db961 1184 sd_event_signal_handler_t callback,
151b9b96 1185 void *userdata) {
305f78bf 1186
ec766a51 1187 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1188 struct signal_data *d;
fd38203a
LP
1189 int r;
1190
305f78bf 1191 assert_return(e, -EINVAL);
b937d761 1192 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1193 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1194 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1195 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1196
59bc1fd7
LP
1197 if (!callback)
1198 callback = signal_exit_callback;
1199
d1b75241
LP
1200 r = signal_is_blocked(sig);
1201 if (r < 0)
1202 return r;
1203 if (r == 0)
3022d74b
LP
1204 return -EBUSY;
1205
fd38203a
LP
1206 if (!e->signal_sources) {
1207 e->signal_sources = new0(sd_event_source*, _NSIG);
1208 if (!e->signal_sources)
1209 return -ENOMEM;
1210 } else if (e->signal_sources[sig])
1211 return -EBUSY;
1212
a71fe8b8 1213 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1214 if (!s)
1215 return -ENOMEM;
1216
1217 s->signal.sig = sig;
1218 s->signal.callback = callback;
1219 s->userdata = userdata;
baf76283 1220 s->enabled = SD_EVENT_ON;
fd38203a
LP
1221
1222 e->signal_sources[sig] = s;
fd38203a 1223
9da4cb2b 1224 r = event_make_signal_data(e, sig, &d);
ec766a51 1225 if (r < 0)
9da4cb2b 1226 return r;
fd38203a 1227
f1f00dbb
LP
1228 /* Use the signal name as description for the event source by default */
1229 (void) sd_event_source_set_description(s, signal_to_string(sig));
1230
a71fe8b8
LP
1231 if (ret)
1232 *ret = s;
ec766a51 1233 TAKE_PTR(s);
a71fe8b8 1234
fd38203a
LP
1235 return 0;
1236}
1237
f8f3f926
LP
1238static bool shall_use_pidfd(void) {
1239 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1240 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1241}
1242
f7262a9f 1243_public_ int sd_event_add_child(
305f78bf 1244 sd_event *e,
151b9b96 1245 sd_event_source **ret,
305f78bf
LP
1246 pid_t pid,
1247 int options,
718db961 1248 sd_event_child_handler_t callback,
151b9b96 1249 void *userdata) {
305f78bf 1250
ec766a51 1251 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1252 int r;
1253
305f78bf 1254 assert_return(e, -EINVAL);
b937d761 1255 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1256 assert_return(pid > 1, -EINVAL);
1257 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1258 assert_return(options != 0, -EINVAL);
1259 assert_return(callback, -EINVAL);
da7e457c 1260 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1261 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1262
ee880b37
LP
1263 if (e->n_enabled_child_sources == 0) {
1264 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1265 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1266 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1267 * take effect.
1268 *
1269 * (As an optimization we only do this check on the first child event source created.) */
1270 r = signal_is_blocked(SIGCHLD);
1271 if (r < 0)
1272 return r;
1273 if (r == 0)
1274 return -EBUSY;
1275 }
1276
d5099efc 1277 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1278 if (r < 0)
1279 return r;
1280
4a0b58c4 1281 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1282 return -EBUSY;
1283
a71fe8b8 1284 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1285 if (!s)
1286 return -ENOMEM;
1287
f8f3f926 1288 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1289 s->child.pid = pid;
1290 s->child.options = options;
1291 s->child.callback = callback;
1292 s->userdata = userdata;
baf76283 1293 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1294
f8f3f926
LP
1295 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1296 * pin the PID, and make regular waitid() handling race-free. */
1297
1298 if (shall_use_pidfd()) {
1299 s->child.pidfd = pidfd_open(s->child.pid, 0);
1300 if (s->child.pidfd < 0) {
1301 /* Propagate errors unless the syscall is not supported or blocked */
1302 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1303 return -errno;
1304 } else
1305 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1306 } else
1307 s->child.pidfd = -1;
1308
4a0b58c4 1309 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1310 if (r < 0)
fd38203a 1311 return r;
fd38203a 1312
313cefa1 1313 e->n_enabled_child_sources++;
fd38203a 1314
f8f3f926
LP
1315 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1316 /* We have a pidfd and we only want to watch for exit */
1317
1318 r = source_child_pidfd_register(s, s->enabled);
1319 if (r < 0) {
1320 e->n_enabled_child_sources--;
1321 return r;
1322 }
1323 } else {
1324 /* We have no pidfd or we shall wait for some other event than WEXITED */
fd38203a 1325
f8f3f926
LP
1326 r = event_make_signal_data(e, SIGCHLD, NULL);
1327 if (r < 0) {
1328 e->n_enabled_child_sources--;
1329 return r;
1330 }
1331
1332 e->need_process_child = true;
1333 }
c2ba3ad6 1334
a71fe8b8
LP
1335 if (ret)
1336 *ret = s;
f8f3f926 1337
ec766a51 1338 TAKE_PTR(s);
f8f3f926
LP
1339 return 0;
1340}
1341
1342_public_ int sd_event_add_child_pidfd(
1343 sd_event *e,
1344 sd_event_source **ret,
1345 int pidfd,
1346 int options,
1347 sd_event_child_handler_t callback,
1348 void *userdata) {
1349
1350
1351 _cleanup_(source_freep) sd_event_source *s = NULL;
1352 pid_t pid;
1353 int r;
1354
1355 assert_return(e, -EINVAL);
1356 assert_return(e = event_resolve(e), -ENOPKG);
1357 assert_return(pidfd >= 0, -EBADF);
1358 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1359 assert_return(options != 0, -EINVAL);
1360 assert_return(callback, -EINVAL);
1361 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1362 assert_return(!event_pid_changed(e), -ECHILD);
1363
ee880b37
LP
1364 if (e->n_enabled_child_sources == 0) {
1365 r = signal_is_blocked(SIGCHLD);
1366 if (r < 0)
1367 return r;
1368 if (r == 0)
1369 return -EBUSY;
1370 }
1371
f8f3f926
LP
1372 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1373 if (r < 0)
1374 return r;
1375
1376 r = pidfd_get_pid(pidfd, &pid);
1377 if (r < 0)
1378 return r;
1379
1380 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1381 return -EBUSY;
1382
1383 s = source_new(e, !ret, SOURCE_CHILD);
1384 if (!s)
1385 return -ENOMEM;
1386
1387 s->wakeup = WAKEUP_EVENT_SOURCE;
1388 s->child.pidfd = pidfd;
1389 s->child.pid = pid;
1390 s->child.options = options;
1391 s->child.callback = callback;
1392 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1393 s->userdata = userdata;
1394 s->enabled = SD_EVENT_ONESHOT;
1395
1396 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1397 if (r < 0)
1398 return r;
1399
1400 e->n_enabled_child_sources++;
1401
1402 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1403 /* We only want to watch for WEXITED */
1404
1405 r = source_child_pidfd_register(s, s->enabled);
1406 if (r < 0) {
1407 e->n_enabled_child_sources--;
1408 return r;
1409 }
1410 } else {
1411 /* We shall wait for some other event than WEXITED */
1412
1413 r = event_make_signal_data(e, SIGCHLD, NULL);
1414 if (r < 0) {
1415 e->n_enabled_child_sources--;
1416 return r;
1417 }
a71fe8b8 1418
f8f3f926
LP
1419 e->need_process_child = true;
1420 }
1421
1422 if (ret)
1423 *ret = s;
1424
1425 TAKE_PTR(s);
fd38203a
LP
1426 return 0;
1427}
1428
f7262a9f 1429_public_ int sd_event_add_defer(
305f78bf 1430 sd_event *e,
151b9b96 1431 sd_event_source **ret,
718db961 1432 sd_event_handler_t callback,
151b9b96 1433 void *userdata) {
305f78bf 1434
ec766a51 1435 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1436 int r;
1437
305f78bf 1438 assert_return(e, -EINVAL);
b937d761 1439 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 1440 assert_return(callback, -EINVAL);
da7e457c 1441 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1442 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1443
a71fe8b8 1444 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1445 if (!s)
1446 return -ENOMEM;
1447
1448 s->defer.callback = callback;
1449 s->userdata = userdata;
baf76283 1450 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1451
1452 r = source_set_pending(s, true);
ec766a51 1453 if (r < 0)
fd38203a 1454 return r;
fd38203a 1455
a71fe8b8
LP
1456 if (ret)
1457 *ret = s;
ec766a51 1458 TAKE_PTR(s);
a71fe8b8 1459
fd38203a
LP
1460 return 0;
1461}
1462
6e9feda3
LP
1463_public_ int sd_event_add_post(
1464 sd_event *e,
1465 sd_event_source **ret,
1466 sd_event_handler_t callback,
1467 void *userdata) {
1468
ec766a51 1469 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1470 int r;
1471
1472 assert_return(e, -EINVAL);
b937d761 1473 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3 1474 assert_return(callback, -EINVAL);
6e9feda3
LP
1475 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1476 assert_return(!event_pid_changed(e), -ECHILD);
1477
a71fe8b8 1478 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1479 if (!s)
1480 return -ENOMEM;
1481
1482 s->post.callback = callback;
1483 s->userdata = userdata;
1484 s->enabled = SD_EVENT_ON;
1485
de7fef4b 1486 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1487 if (r < 0)
6e9feda3 1488 return r;
de7fef4b 1489 assert(r > 0);
6e9feda3 1490
a71fe8b8
LP
1491 if (ret)
1492 *ret = s;
ec766a51 1493 TAKE_PTR(s);
a71fe8b8 1494
6e9feda3
LP
1495 return 0;
1496}
1497
6203e07a 1498_public_ int sd_event_add_exit(
305f78bf 1499 sd_event *e,
151b9b96 1500 sd_event_source **ret,
718db961 1501 sd_event_handler_t callback,
151b9b96 1502 void *userdata) {
305f78bf 1503
ec766a51 1504 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1505 int r;
1506
1507 assert_return(e, -EINVAL);
b937d761 1508 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1509 assert_return(callback, -EINVAL);
1510 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1511 assert_return(!event_pid_changed(e), -ECHILD);
1512
c983e776
EV
1513 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1514 if (r < 0)
1515 return r;
da7e457c 1516
a71fe8b8 1517 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1518 if (!s)
da7e457c 1519 return -ENOMEM;
fd38203a 1520
6203e07a 1521 s->exit.callback = callback;
da7e457c 1522 s->userdata = userdata;
6203e07a 1523 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1524 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1525
6203e07a 1526 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1527 if (r < 0)
da7e457c 1528 return r;
da7e457c 1529
a71fe8b8
LP
1530 if (ret)
1531 *ret = s;
ec766a51 1532 TAKE_PTR(s);
a71fe8b8 1533
da7e457c
LP
1534 return 0;
1535}
1536
97ef5391
LP
1537static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1538 assert(e);
1539
1540 if (!d)
1541 return;
1542
1543 assert(hashmap_isempty(d->inodes));
1544 assert(hashmap_isempty(d->wd));
1545
1546 if (d->buffer_filled > 0)
1547 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1548
1549 hashmap_free(d->inodes);
1550 hashmap_free(d->wd);
1551
1552 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1553
1554 if (d->fd >= 0) {
1555 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1556 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1557
1558 safe_close(d->fd);
1559 }
1560 free(d);
1561}
1562
1563static int event_make_inotify_data(
1564 sd_event *e,
1565 int64_t priority,
1566 struct inotify_data **ret) {
1567
1568 _cleanup_close_ int fd = -1;
1569 struct inotify_data *d;
97ef5391
LP
1570 int r;
1571
1572 assert(e);
1573
1574 d = hashmap_get(e->inotify_data, &priority);
1575 if (d) {
1576 if (ret)
1577 *ret = d;
1578 return 0;
1579 }
1580
1581 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1582 if (fd < 0)
1583 return -errno;
1584
1585 fd = fd_move_above_stdio(fd);
1586
1587 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1588 if (r < 0)
1589 return r;
1590
1591 d = new(struct inotify_data, 1);
1592 if (!d)
1593 return -ENOMEM;
1594
1595 *d = (struct inotify_data) {
1596 .wakeup = WAKEUP_INOTIFY_DATA,
1597 .fd = TAKE_FD(fd),
1598 .priority = priority,
1599 };
1600
1601 r = hashmap_put(e->inotify_data, &d->priority, d);
1602 if (r < 0) {
1603 d->fd = safe_close(d->fd);
1604 free(d);
1605 return r;
1606 }
1607
1eac7948 1608 struct epoll_event ev = {
97ef5391
LP
1609 .events = EPOLLIN,
1610 .data.ptr = d,
1611 };
1612
1613 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1614 r = -errno;
1615 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1616 * remove the fd from the epoll first, which we don't want as we couldn't
1617 * add it in the first place. */
1618 event_free_inotify_data(e, d);
1619 return r;
1620 }
1621
1622 if (ret)
1623 *ret = d;
1624
1625 return 1;
1626}
1627
7a08d314 1628static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1629 int r;
97ef5391
LP
1630
1631 assert(x);
1632 assert(y);
1633
90c88092
YW
1634 r = CMP(x->dev, y->dev);
1635 if (r != 0)
1636 return r;
97ef5391 1637
6dd91b36 1638 return CMP(x->ino, y->ino);
97ef5391
LP
1639}
1640
7a08d314
YW
1641static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1642 assert(d);
97ef5391
LP
1643
1644 siphash24_compress(&d->dev, sizeof(d->dev), state);
1645 siphash24_compress(&d->ino, sizeof(d->ino), state);
1646}
1647
7a08d314 1648DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1649
1650static void event_free_inode_data(
1651 sd_event *e,
1652 struct inode_data *d) {
1653
1654 assert(e);
1655
1656 if (!d)
1657 return;
1658
1659 assert(!d->event_sources);
1660
1661 if (d->fd >= 0) {
1662 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1663 safe_close(d->fd);
1664 }
1665
1666 if (d->inotify_data) {
1667
1668 if (d->wd >= 0) {
1669 if (d->inotify_data->fd >= 0) {
1670 /* So here's a problem. At the time this runs the watch descriptor might already be
1671 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1672 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1673 * likely case to happen. */
1674
1675 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1676 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1677 }
1678
1679 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1680 }
1681
1682 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1683 }
1684
1685 free(d);
1686}
1687
1688static void event_gc_inode_data(
1689 sd_event *e,
1690 struct inode_data *d) {
1691
1692 struct inotify_data *inotify_data;
1693
1694 assert(e);
1695
1696 if (!d)
1697 return;
1698
1699 if (d->event_sources)
1700 return;
1701
1702 inotify_data = d->inotify_data;
1703 event_free_inode_data(e, d);
1704
1705 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1706 event_free_inotify_data(e, inotify_data);
1707}
1708
1709static int event_make_inode_data(
1710 sd_event *e,
1711 struct inotify_data *inotify_data,
1712 dev_t dev,
1713 ino_t ino,
1714 struct inode_data **ret) {
1715
1716 struct inode_data *d, key;
1717 int r;
1718
1719 assert(e);
1720 assert(inotify_data);
1721
1722 key = (struct inode_data) {
1723 .ino = ino,
1724 .dev = dev,
1725 };
1726
1727 d = hashmap_get(inotify_data->inodes, &key);
1728 if (d) {
1729 if (ret)
1730 *ret = d;
1731
1732 return 0;
1733 }
1734
1735 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1736 if (r < 0)
1737 return r;
1738
1739 d = new(struct inode_data, 1);
1740 if (!d)
1741 return -ENOMEM;
1742
1743 *d = (struct inode_data) {
1744 .dev = dev,
1745 .ino = ino,
1746 .wd = -1,
1747 .fd = -1,
1748 .inotify_data = inotify_data,
1749 };
1750
1751 r = hashmap_put(inotify_data->inodes, d, d);
1752 if (r < 0) {
1753 free(d);
1754 return r;
1755 }
1756
1757 if (ret)
1758 *ret = d;
1759
1760 return 1;
1761}
1762
1763static uint32_t inode_data_determine_mask(struct inode_data *d) {
1764 bool excl_unlink = true;
1765 uint32_t combined = 0;
1766 sd_event_source *s;
1767
1768 assert(d);
1769
1770 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1771 * the IN_EXCL_UNLINK flag is ANDed instead.
1772 *
1773 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1774 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1775 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1776 * events we don't care for client-side. */
1777
1778 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1779
1780 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1781 excl_unlink = false;
1782
1783 combined |= s->inotify.mask;
1784 }
1785
1786 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1787}
1788
1789static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1790 uint32_t combined_mask;
1791 int wd, r;
1792
1793 assert(d);
1794 assert(d->fd >= 0);
1795
1796 combined_mask = inode_data_determine_mask(d);
1797
1798 if (d->wd >= 0 && combined_mask == d->combined_mask)
1799 return 0;
1800
1801 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1802 if (r < 0)
1803 return r;
1804
1805 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1806 if (wd < 0)
1807 return -errno;
1808
1809 if (d->wd < 0) {
1810 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1811 if (r < 0) {
1812 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1813 return r;
1814 }
1815
1816 d->wd = wd;
1817
1818 } else if (d->wd != wd) {
1819
1820 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1821 (void) inotify_rm_watch(d->fd, wd);
1822 return -EINVAL;
1823 }
1824
1825 d->combined_mask = combined_mask;
1826 return 1;
1827}
1828
1829_public_ int sd_event_add_inotify(
1830 sd_event *e,
1831 sd_event_source **ret,
1832 const char *path,
1833 uint32_t mask,
1834 sd_event_inotify_handler_t callback,
1835 void *userdata) {
1836
97ef5391
LP
1837 struct inotify_data *inotify_data = NULL;
1838 struct inode_data *inode_data = NULL;
1839 _cleanup_close_ int fd = -1;
8c75fe17 1840 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
1841 struct stat st;
1842 int r;
1843
1844 assert_return(e, -EINVAL);
1845 assert_return(e = event_resolve(e), -ENOPKG);
1846 assert_return(path, -EINVAL);
1847 assert_return(callback, -EINVAL);
1848 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1849 assert_return(!event_pid_changed(e), -ECHILD);
1850
1851 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1852 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1853 * the user can't use them for us. */
1854 if (mask & IN_MASK_ADD)
1855 return -EINVAL;
1856
1857 fd = open(path, O_PATH|O_CLOEXEC|
1858 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1859 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1860 if (fd < 0)
1861 return -errno;
1862
1863 if (fstat(fd, &st) < 0)
1864 return -errno;
1865
1866 s = source_new(e, !ret, SOURCE_INOTIFY);
1867 if (!s)
1868 return -ENOMEM;
1869
1870 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1871 s->inotify.mask = mask;
1872 s->inotify.callback = callback;
1873 s->userdata = userdata;
1874
1875 /* Allocate an inotify object for this priority, and an inode object within it */
1876 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1877 if (r < 0)
8c75fe17 1878 return r;
97ef5391
LP
1879
1880 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17
ZJS
1881 if (r < 0) {
1882 event_free_inotify_data(e, inotify_data);
1883 return r;
1884 }
97ef5391
LP
1885
1886 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1887 * the event source, until then, for which we need the original inode. */
1888 if (inode_data->fd < 0) {
1889 inode_data->fd = TAKE_FD(fd);
1890 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1891 }
1892
1893 /* Link our event source to the inode data object */
1894 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1895 s->inotify.inode_data = inode_data;
1896
97ef5391
LP
1897 /* Actually realize the watch now */
1898 r = inode_data_realize_watch(e, inode_data);
1899 if (r < 0)
8c75fe17 1900 return r;
97ef5391
LP
1901
1902 (void) sd_event_source_set_description(s, path);
1903
1904 if (ret)
1905 *ret = s;
8c75fe17 1906 TAKE_PTR(s);
97ef5391
LP
1907
1908 return 0;
97ef5391
LP
1909}
1910
8301aa0b 1911static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
1912 if (!s)
1913 return NULL;
da7e457c 1914
8301aa0b
YW
1915 /* Here's a special hack: when we are called from a
1916 * dispatch handler we won't free the event source
1917 * immediately, but we will detach the fd from the
1918 * epoll. This way it is safe for the caller to unref
1919 * the event source and immediately close the fd, but
1920 * we still retain a valid event source object after
1921 * the callback. */
fd38203a 1922
8301aa0b
YW
1923 if (s->dispatching) {
1924 if (s->type == SOURCE_IO)
1925 source_io_unregister(s);
fd38203a 1926
8301aa0b
YW
1927 source_disconnect(s);
1928 } else
1929 source_free(s);
fd38203a
LP
1930
1931 return NULL;
1932}
1933
8301aa0b
YW
1934DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1935
356779df 1936_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 1937 assert_return(s, -EINVAL);
f4b2933e 1938 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1939
356779df 1940 return free_and_strdup(&s->description, description);
f7f53e9e
TG
1941}
1942
356779df 1943_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 1944 assert_return(s, -EINVAL);
356779df 1945 assert_return(description, -EINVAL);
f4b2933e 1946 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1947
7d92a1a4
ZJS
1948 if (!s->description)
1949 return -ENXIO;
1950
356779df 1951 *description = s->description;
f7f53e9e
TG
1952 return 0;
1953}
1954
adcc4ca3 1955_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 1956 assert_return(s, NULL);
eaa3cbef
LP
1957
1958 return s->event;
1959}
1960
f7262a9f 1961_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 1962 assert_return(s, -EINVAL);
6203e07a 1963 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 1964 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1965 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1966
1967 return s->pending;
1968}
1969
f7262a9f 1970_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
1971 assert_return(s, -EINVAL);
1972 assert_return(s->type == SOURCE_IO, -EDOM);
1973 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1974
1975 return s->io.fd;
1976}
1977
30caf8f3
LP
1978_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1979 int r;
1980
1981 assert_return(s, -EINVAL);
8ac43fee 1982 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
1983 assert_return(s->type == SOURCE_IO, -EDOM);
1984 assert_return(!event_pid_changed(s->event), -ECHILD);
1985
1986 if (s->io.fd == fd)
1987 return 0;
1988
1989 if (s->enabled == SD_EVENT_OFF) {
1990 s->io.fd = fd;
1991 s->io.registered = false;
1992 } else {
1993 int saved_fd;
1994
1995 saved_fd = s->io.fd;
1996 assert(s->io.registered);
1997
1998 s->io.fd = fd;
1999 s->io.registered = false;
2000
2001 r = source_io_register(s, s->enabled, s->io.events);
2002 if (r < 0) {
2003 s->io.fd = saved_fd;
2004 s->io.registered = true;
2005 return r;
2006 }
2007
5a795bff 2008 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2009 }
2010
2011 return 0;
2012}
2013
ab93297c
NM
2014_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2015 assert_return(s, -EINVAL);
2016 assert_return(s->type == SOURCE_IO, -EDOM);
2017
2018 return s->io.owned;
2019}
2020
2021_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2022 assert_return(s, -EINVAL);
2023 assert_return(s->type == SOURCE_IO, -EDOM);
2024
2025 s->io.owned = own;
2026 return 0;
2027}
2028
f7262a9f 2029_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2030 assert_return(s, -EINVAL);
2031 assert_return(events, -EINVAL);
2032 assert_return(s->type == SOURCE_IO, -EDOM);
2033 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2034
2035 *events = s->io.events;
2036 return 0;
2037}
2038
f7262a9f 2039_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2040 int r;
2041
305f78bf
LP
2042 assert_return(s, -EINVAL);
2043 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2044 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2045 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2046 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2047
b63c8d4f
DH
2048 /* edge-triggered updates are never skipped, so we can reset edges */
2049 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2050 return 0;
2051
2a0dc6cd
LP
2052 r = source_set_pending(s, false);
2053 if (r < 0)
2054 return r;
2055
baf76283 2056 if (s->enabled != SD_EVENT_OFF) {
e4715127 2057 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2058 if (r < 0)
2059 return r;
2060 }
2061
2062 s->io.events = events;
2063
2064 return 0;
2065}
2066
f7262a9f 2067_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2068 assert_return(s, -EINVAL);
2069 assert_return(revents, -EINVAL);
2070 assert_return(s->type == SOURCE_IO, -EDOM);
2071 assert_return(s->pending, -ENODATA);
2072 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2073
2074 *revents = s->io.revents;
2075 return 0;
2076}
2077
f7262a9f 2078_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2079 assert_return(s, -EINVAL);
2080 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2081 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2082
2083 return s->signal.sig;
2084}
2085
31927c16 2086_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2087 assert_return(s, -EINVAL);
2088 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2089
6680b8d1
ME
2090 *priority = s->priority;
2091 return 0;
fd38203a
LP
2092}
2093
31927c16 2094_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2095 bool rm_inotify = false, rm_inode = false;
2096 struct inotify_data *new_inotify_data = NULL;
2097 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2098 int r;
2099
305f78bf 2100 assert_return(s, -EINVAL);
da7e457c 2101 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2102 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2103
2104 if (s->priority == priority)
2105 return 0;
2106
97ef5391
LP
2107 if (s->type == SOURCE_INOTIFY) {
2108 struct inode_data *old_inode_data;
2109
2110 assert(s->inotify.inode_data);
2111 old_inode_data = s->inotify.inode_data;
2112
2113 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2114 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2115 * events we allow priority changes only until the first following iteration. */
2116 if (old_inode_data->fd < 0)
2117 return -EOPNOTSUPP;
2118
2119 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2120 if (r < 0)
2121 return r;
2122 rm_inotify = r > 0;
2123
2124 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2125 if (r < 0)
2126 goto fail;
2127 rm_inode = r > 0;
2128
2129 if (new_inode_data->fd < 0) {
2130 /* Duplicate the fd for the new inode object if we don't have any yet */
2131 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2132 if (new_inode_data->fd < 0) {
2133 r = -errno;
2134 goto fail;
2135 }
2136
2137 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2138 }
2139
2140 /* Move the event source to the new inode data structure */
2141 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2142 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2143 s->inotify.inode_data = new_inode_data;
2144
2145 /* Now create the new watch */
2146 r = inode_data_realize_watch(s->event, new_inode_data);
2147 if (r < 0) {
2148 /* Move it back */
2149 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2150 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2151 s->inotify.inode_data = old_inode_data;
2152 goto fail;
2153 }
2154
2155 s->priority = priority;
2156
2157 event_gc_inode_data(s->event, old_inode_data);
2158
2159 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
9da4cb2b
LP
2160 struct signal_data *old, *d;
2161
2162 /* Move us from the signalfd belonging to the old
2163 * priority to the signalfd of the new priority */
2164
2165 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2166
2167 s->priority = priority;
2168
2169 r = event_make_signal_data(s->event, s->signal.sig, &d);
2170 if (r < 0) {
2171 s->priority = old->priority;
2172 return r;
2173 }
2174
2175 event_unmask_signal_data(s->event, old, s->signal.sig);
2176 } else
2177 s->priority = priority;
fd38203a
LP
2178
2179 if (s->pending)
c2ba3ad6 2180 prioq_reshuffle(s->event->pending, s, &s->pending_index);
fd38203a
LP
2181
2182 if (s->prepare)
c2ba3ad6 2183 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
fd38203a 2184
6203e07a
LP
2185 if (s->type == SOURCE_EXIT)
2186 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2187
fd38203a 2188 return 0;
97ef5391
LP
2189
2190fail:
2191 if (rm_inode)
2192 event_free_inode_data(s->event, new_inode_data);
2193
2194 if (rm_inotify)
2195 event_free_inotify_data(s->event, new_inotify_data);
2196
2197 return r;
fd38203a
LP
2198}
2199
f7262a9f 2200_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
305f78bf 2201 assert_return(s, -EINVAL);
305f78bf 2202 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2203
08c1eb0e
ZJS
2204 if (m)
2205 *m = s->enabled;
2206 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2207}
2208
f7262a9f 2209_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
fd38203a
LP
2210 int r;
2211
305f78bf 2212 assert_return(s, -EINVAL);
945c2931 2213 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
305f78bf 2214 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2215
cc567911
LP
2216 /* If we are dead anyway, we are fine with turning off
2217 * sources, but everything else needs to fail. */
2218 if (s->event->state == SD_EVENT_FINISHED)
2219 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2220
baf76283 2221 if (s->enabled == m)
fd38203a
LP
2222 return 0;
2223
baf76283 2224 if (m == SD_EVENT_OFF) {
fd38203a 2225
ac989a78
LP
2226 /* Unset the pending flag when this event source is disabled */
2227 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2228 r = source_set_pending(s, false);
2229 if (r < 0)
2230 return r;
2231 }
2232
fd38203a
LP
2233 switch (s->type) {
2234
2235 case SOURCE_IO:
366e6411 2236 source_io_unregister(s);
baf76283 2237 s->enabled = m;
fd38203a
LP
2238 break;
2239
6a0f1f6d 2240 case SOURCE_TIME_REALTIME:
a8548816 2241 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
2242 case SOURCE_TIME_MONOTONIC:
2243 case SOURCE_TIME_REALTIME_ALARM:
2244 case SOURCE_TIME_BOOTTIME_ALARM: {
2245 struct clock_data *d;
fd38203a 2246
baf76283 2247 s->enabled = m;
6a0f1f6d
LP
2248 d = event_get_clock_data(s->event, s->type);
2249 assert(d);
2250
2251 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2252 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2253 d->needs_rearm = true;
fd38203a 2254 break;
6a0f1f6d 2255 }
fd38203a
LP
2256
2257 case SOURCE_SIGNAL:
baf76283 2258 s->enabled = m;
4807d2d0 2259
9da4cb2b 2260 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
fd38203a
LP
2261 break;
2262
2263 case SOURCE_CHILD:
baf76283 2264 s->enabled = m;
fd38203a 2265
baf76283
LP
2266 assert(s->event->n_enabled_child_sources > 0);
2267 s->event->n_enabled_child_sources--;
fd38203a 2268
f8f3f926
LP
2269 if (EVENT_SOURCE_WATCH_PIDFD(s))
2270 source_child_pidfd_unregister(s);
2271 else
2272 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2273
fd38203a
LP
2274 break;
2275
6203e07a 2276 case SOURCE_EXIT:
305f78bf 2277 s->enabled = m;
6203e07a 2278 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf
LP
2279 break;
2280
2281 case SOURCE_DEFER:
6e9feda3 2282 case SOURCE_POST:
97ef5391 2283 case SOURCE_INOTIFY:
baf76283 2284 s->enabled = m;
fd38203a 2285 break;
9d3e3aa5 2286
6a0f1f6d 2287 default:
9d3e3aa5 2288 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
2289 }
2290
2291 } else {
ac989a78
LP
2292
2293 /* Unset the pending flag when this event source is enabled */
2294 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2295 r = source_set_pending(s, false);
2296 if (r < 0)
2297 return r;
2298 }
2299
fd38203a
LP
2300 switch (s->type) {
2301
2302 case SOURCE_IO:
2303 r = source_io_register(s, m, s->io.events);
2304 if (r < 0)
2305 return r;
2306
baf76283 2307 s->enabled = m;
fd38203a
LP
2308 break;
2309
6a0f1f6d 2310 case SOURCE_TIME_REALTIME:
a8548816 2311 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
2312 case SOURCE_TIME_MONOTONIC:
2313 case SOURCE_TIME_REALTIME_ALARM:
2314 case SOURCE_TIME_BOOTTIME_ALARM: {
2315 struct clock_data *d;
fd38203a 2316
baf76283 2317 s->enabled = m;
6a0f1f6d
LP
2318 d = event_get_clock_data(s->event, s->type);
2319 assert(d);
2320
2321 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2322 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2323 d->needs_rearm = true;
fd38203a 2324 break;
6a0f1f6d 2325 }
fd38203a
LP
2326
2327 case SOURCE_SIGNAL:
4807d2d0
ZJS
2328
2329 s->enabled = m;
9da4cb2b
LP
2330
2331 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2332 if (r < 0) {
2333 s->enabled = SD_EVENT_OFF;
2334 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2335 return r;
2336 }
2337
fd38203a
LP
2338 break;
2339
2340 case SOURCE_CHILD:
4807d2d0 2341
9da4cb2b 2342 if (s->enabled == SD_EVENT_OFF)
4807d2d0 2343 s->event->n_enabled_child_sources++;
7a0d4a3d
DH
2344
2345 s->enabled = m;
9da4cb2b 2346
f8f3f926
LP
2347 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2348 /* yes, we have pidfd */
2349
2350 r = source_child_pidfd_register(s, s->enabled);
2351 if (r < 0) {
2352 s->enabled = SD_EVENT_OFF;
2353 s->event->n_enabled_child_sources--;
2354 return r;
2355 }
2356 } else {
2357 /* no pidfd, or something other to watch for than WEXITED */
2358
2359 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2360 if (r < 0) {
2361 s->enabled = SD_EVENT_OFF;
2362 s->event->n_enabled_child_sources--;
2363 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2364 return r;
2365 }
9da4cb2b
LP
2366 }
2367
fd38203a
LP
2368 break;
2369
6203e07a 2370 case SOURCE_EXIT:
305f78bf 2371 s->enabled = m;
6203e07a 2372 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf
LP
2373 break;
2374
2375 case SOURCE_DEFER:
6e9feda3 2376 case SOURCE_POST:
97ef5391 2377 case SOURCE_INOTIFY:
baf76283 2378 s->enabled = m;
fd38203a 2379 break;
9d3e3aa5 2380
6a0f1f6d 2381 default:
9d3e3aa5 2382 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
2383 }
2384 }
2385
2386 if (s->pending)
2387 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2388
2389 if (s->prepare)
2390 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2391
2392 return 0;
2393}
2394
f7262a9f 2395_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2396 assert_return(s, -EINVAL);
2397 assert_return(usec, -EINVAL);
6a0f1f6d 2398 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2399 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2400
2401 *usec = s->time.next;
2402 return 0;
2403}
2404
f7262a9f 2405_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
6a0f1f6d 2406 struct clock_data *d;
2a0dc6cd 2407 int r;
6a0f1f6d 2408
305f78bf 2409 assert_return(s, -EINVAL);
6a0f1f6d 2410 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2411 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2412 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2413
2a0dc6cd
LP
2414 r = source_set_pending(s, false);
2415 if (r < 0)
2416 return r;
2576a19e 2417
2a0dc6cd 2418 s->time.next = usec;
fd38203a 2419
6a0f1f6d
LP
2420 d = event_get_clock_data(s->event, s->type);
2421 assert(d);
2422
2423 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2424 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2425 d->needs_rearm = true;
fd38203a
LP
2426
2427 return 0;
2428}
2429
d6a83dc4
LP
2430_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2431 usec_t t;
2432 int r;
2433
2434 assert_return(s, -EINVAL);
2435 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2436
2437 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2438 if (r < 0)
2439 return r;
2440
2441 if (usec >= USEC_INFINITY - t)
2442 return -EOVERFLOW;
2443
2444 return sd_event_source_set_time(s, t + usec);
2445}
2446
f7262a9f 2447_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2448 assert_return(s, -EINVAL);
2449 assert_return(usec, -EINVAL);
6a0f1f6d 2450 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2451 assert_return(!event_pid_changed(s->event), -ECHILD);
2452
2453 *usec = s->time.accuracy;
2454 return 0;
2455}
2456
f7262a9f 2457_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
6a0f1f6d 2458 struct clock_data *d;
2a0dc6cd 2459 int r;
6a0f1f6d 2460
305f78bf
LP
2461 assert_return(s, -EINVAL);
2462 assert_return(usec != (uint64_t) -1, -EINVAL);
6a0f1f6d 2463 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2464 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2465 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2466
2a0dc6cd
LP
2467 r = source_set_pending(s, false);
2468 if (r < 0)
2469 return r;
2470
eaa3cbef
LP
2471 if (usec == 0)
2472 usec = DEFAULT_ACCURACY_USEC;
2473
eaa3cbef
LP
2474 s->time.accuracy = usec;
2475
6a0f1f6d
LP
2476 d = event_get_clock_data(s->event, s->type);
2477 assert(d);
2478
2479 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2480 d->needs_rearm = true;
6a0f1f6d
LP
2481
2482 return 0;
2483}
2484
2485_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2486 assert_return(s, -EINVAL);
2487 assert_return(clock, -EINVAL);
2488 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2489 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2490
6a0f1f6d 2491 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2492 return 0;
2493}
2494
f7262a9f 2495_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2496 assert_return(s, -EINVAL);
2497 assert_return(pid, -EINVAL);
2498 assert_return(s->type == SOURCE_CHILD, -EDOM);
2499 assert_return(!event_pid_changed(s->event), -ECHILD);
2500
2501 *pid = s->child.pid;
2502 return 0;
2503}
2504
f8f3f926
LP
2505_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2506 assert_return(s, -EINVAL);
2507 assert_return(s->type == SOURCE_CHILD, -EDOM);
2508 assert_return(!event_pid_changed(s->event), -ECHILD);
2509
2510 if (s->child.pidfd < 0)
2511 return -EOPNOTSUPP;
2512
2513 return s->child.pidfd;
2514}
2515
2516_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2517 assert_return(s, -EINVAL);
2518 assert_return(s->type == SOURCE_CHILD, -EDOM);
2519 assert_return(!event_pid_changed(s->event), -ECHILD);
2520 assert_return(SIGNAL_VALID(sig), -EINVAL);
2521
2522 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2523 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2524 * available. */
2525 if (s->child.exited)
2526 return -ESRCH;
2527
2528 if (s->child.pidfd >= 0) {
2529 siginfo_t copy;
2530
2531 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2532 * structure here */
2533 if (si)
2534 copy = *si;
2535
2536 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2537 /* Let's propagate the error only if the system call is not implemented or prohibited */
2538 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2539 return -errno;
2540 } else
2541 return 0;
2542 }
2543
2544 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2545 * this here. */
2546 if (flags != 0)
2547 return -EOPNOTSUPP;
2548
2549 if (si) {
2550 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2551 siginfo_t copy = *si;
2552
2553 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2554 return -errno;
2555 } else if (kill(s->child.pid, sig) < 0)
2556 return -errno;
2557
2558 return 0;
2559}
2560
2561_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2562 assert_return(s, -EINVAL);
2563 assert_return(s->type == SOURCE_CHILD, -EDOM);
2564
2565 if (s->child.pidfd < 0)
2566 return -EOPNOTSUPP;
2567
2568 return s->child.pidfd_owned;
2569}
2570
2571_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2572 assert_return(s, -EINVAL);
2573 assert_return(s->type == SOURCE_CHILD, -EDOM);
2574
2575 if (s->child.pidfd < 0)
2576 return -EOPNOTSUPP;
2577
2578 s->child.pidfd_owned = own;
2579 return 0;
2580}
2581
2582_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2583 assert_return(s, -EINVAL);
2584 assert_return(s->type == SOURCE_CHILD, -EDOM);
2585
2586 return s->child.process_owned;
2587}
2588
2589_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2590 assert_return(s, -EINVAL);
2591 assert_return(s->type == SOURCE_CHILD, -EDOM);
2592
2593 s->child.process_owned = own;
2594 return 0;
2595}
2596
97ef5391
LP
2597_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2598 assert_return(s, -EINVAL);
2599 assert_return(mask, -EINVAL);
2600 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2601 assert_return(!event_pid_changed(s->event), -ECHILD);
2602
2603 *mask = s->inotify.mask;
2604 return 0;
2605}
2606
718db961 2607_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2608 int r;
2609
da7e457c 2610 assert_return(s, -EINVAL);
6203e07a 2611 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2612 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2613 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2614
2615 if (s->prepare == callback)
2616 return 0;
2617
2618 if (callback && s->prepare) {
2619 s->prepare = callback;
2620 return 0;
2621 }
2622
2623 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2624 if (r < 0)
2625 return r;
2626
2627 s->prepare = callback;
2628
2629 if (callback) {
2630 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2631 if (r < 0)
2632 return r;
2633 } else
2634 prioq_remove(s->event->prepare, s, &s->prepare_index);
2635
2636 return 0;
2637}
2638
f7262a9f 2639_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2640 assert_return(s, NULL);
fd38203a
LP
2641
2642 return s->userdata;
2643}
2644
8f726607
LP
2645_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2646 void *ret;
2647
2648 assert_return(s, NULL);
2649
2650 ret = s->userdata;
2651 s->userdata = userdata;
2652
2653 return ret;
2654}
2655
c2ba3ad6
LP
2656static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2657 usec_t c;
2658 assert(e);
2659 assert(a <= b);
2660
2661 if (a <= 0)
2662 return 0;
393003e1
LP
2663 if (a >= USEC_INFINITY)
2664 return USEC_INFINITY;
c2ba3ad6
LP
2665
2666 if (b <= a + 1)
2667 return a;
2668
52444dc4
LP
2669 initialize_perturb(e);
2670
c2ba3ad6
LP
2671 /*
2672 Find a good time to wake up again between times a and b. We
2673 have two goals here:
2674
2675 a) We want to wake up as seldom as possible, hence prefer
2676 later times over earlier times.
2677
2678 b) But if we have to wake up, then let's make sure to
2679 dispatch as much as possible on the entire system.
2680
2681 We implement this by waking up everywhere at the same time
850516e0 2682 within any given minute if we can, synchronised via the
c2ba3ad6 2683 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
2684 then we try to find the same spot in every 10s, then 1s and
2685 then 250ms step. Otherwise, we pick the last possible time
2686 to wake up.
c2ba3ad6
LP
2687 */
2688
850516e0
LP
2689 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2690 if (c >= b) {
2691 if (_unlikely_(c < USEC_PER_MINUTE))
2692 return b;
2693
2694 c -= USEC_PER_MINUTE;
2695 }
2696
ba276c81
LP
2697 if (c >= a)
2698 return c;
2699
2700 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2701 if (c >= b) {
2702 if (_unlikely_(c < USEC_PER_SEC*10))
2703 return b;
2704
2705 c -= USEC_PER_SEC*10;
2706 }
2707
850516e0
LP
2708 if (c >= a)
2709 return c;
2710
2711 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
2712 if (c >= b) {
2713 if (_unlikely_(c < USEC_PER_SEC))
2714 return b;
2715
2716 c -= USEC_PER_SEC;
2717 }
2718
2719 if (c >= a)
2720 return c;
2721
2722 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2723 if (c >= b) {
2724 if (_unlikely_(c < USEC_PER_MSEC*250))
2725 return b;
2726
2727 c -= USEC_PER_MSEC*250;
2728 }
2729
2730 if (c >= a)
2731 return c;
2732
2733 return b;
2734}
2735
fd38203a
LP
2736static int event_arm_timer(
2737 sd_event *e,
6a0f1f6d 2738 struct clock_data *d) {
fd38203a
LP
2739
2740 struct itimerspec its = {};
c2ba3ad6
LP
2741 sd_event_source *a, *b;
2742 usec_t t;
fd38203a
LP
2743 int r;
2744
cde93897 2745 assert(e);
6a0f1f6d 2746 assert(d);
fd38203a 2747
d06441da 2748 if (!d->needs_rearm)
212bbb17
TG
2749 return 0;
2750 else
2751 d->needs_rearm = false;
2752
6a0f1f6d 2753 a = prioq_peek(d->earliest);
393003e1 2754 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
72aedc1e 2755
6a0f1f6d 2756 if (d->fd < 0)
c57b5ca3
LP
2757 return 0;
2758
3a43da28 2759 if (d->next == USEC_INFINITY)
72aedc1e
LP
2760 return 0;
2761
2762 /* disarm */
6a0f1f6d 2763 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
72aedc1e
LP
2764 if (r < 0)
2765 return r;
2766
3a43da28 2767 d->next = USEC_INFINITY;
fd38203a 2768 return 0;
72aedc1e 2769 }
fd38203a 2770
6a0f1f6d 2771 b = prioq_peek(d->latest);
baf76283 2772 assert_se(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 2773
1bce0ffa 2774 t = sleep_between(e, a->time.next, time_event_source_latest(b));
6a0f1f6d 2775 if (d->next == t)
fd38203a
LP
2776 return 0;
2777
6a0f1f6d 2778 assert_se(d->fd >= 0);
fd38203a 2779
c2ba3ad6 2780 if (t == 0) {
fd38203a
LP
2781 /* We don' want to disarm here, just mean some time looooong ago. */
2782 its.it_value.tv_sec = 0;
2783 its.it_value.tv_nsec = 1;
2784 } else
c2ba3ad6 2785 timespec_store(&its.it_value, t);
fd38203a 2786
6a0f1f6d 2787 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
fd38203a 2788 if (r < 0)
cde93897 2789 return -errno;
fd38203a 2790
6a0f1f6d 2791 d->next = t;
fd38203a
LP
2792 return 0;
2793}
2794
9a800b56 2795static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
2796 assert(e);
2797 assert(s);
2798 assert(s->type == SOURCE_IO);
2799
9a800b56
LP
2800 /* If the event source was already pending, we just OR in the
2801 * new revents, otherwise we reset the value. The ORing is
2802 * necessary to handle EPOLLONESHOT events properly where
2803 * readability might happen independently of writability, and
2804 * we need to keep track of both */
2805
2806 if (s->pending)
2807 s->io.revents |= revents;
2808 else
2809 s->io.revents = revents;
fd38203a 2810
fd38203a
LP
2811 return source_set_pending(s, true);
2812}
2813
72aedc1e 2814static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
2815 uint64_t x;
2816 ssize_t ss;
2817
2818 assert(e);
da7e457c 2819 assert(fd >= 0);
72aedc1e 2820
305f78bf 2821 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
2822
2823 ss = read(fd, &x, sizeof(x));
2824 if (ss < 0) {
945c2931 2825 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
2826 return 0;
2827
2828 return -errno;
2829 }
2830
8d35dae7 2831 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
2832 return -EIO;
2833
cde93897 2834 if (next)
3a43da28 2835 *next = USEC_INFINITY;
72aedc1e 2836
fd38203a
LP
2837 return 0;
2838}
2839
305f78bf
LP
2840static int process_timer(
2841 sd_event *e,
2842 usec_t n,
6a0f1f6d 2843 struct clock_data *d) {
305f78bf 2844
fd38203a
LP
2845 sd_event_source *s;
2846 int r;
2847
2848 assert(e);
6a0f1f6d 2849 assert(d);
fd38203a
LP
2850
2851 for (;;) {
6a0f1f6d 2852 s = prioq_peek(d->earliest);
fd38203a
LP
2853 if (!s ||
2854 s->time.next > n ||
baf76283 2855 s->enabled == SD_EVENT_OFF ||
fd38203a
LP
2856 s->pending)
2857 break;
2858
2859 r = source_set_pending(s, true);
2860 if (r < 0)
2861 return r;
2862
6a0f1f6d
LP
2863 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2864 prioq_reshuffle(d->latest, s, &s->time.latest_index);
e07bbb7c 2865 d->needs_rearm = true;
fd38203a
LP
2866 }
2867
2868 return 0;
2869}
2870
2871static int process_child(sd_event *e) {
2872 sd_event_source *s;
fd38203a
LP
2873 int r;
2874
2875 assert(e);
2876
c2ba3ad6
LP
2877 e->need_process_child = false;
2878
fd38203a
LP
2879 /*
2880 So, this is ugly. We iteratively invoke waitid() with P_PID
2881 + WNOHANG for each PID we wait for, instead of using
2882 P_ALL. This is because we only want to get child
2883 information of very specific child processes, and not all
2884 of them. We might not have processed the SIGCHLD even of a
2885 previous invocation and we don't want to maintain a
2886 unbounded *per-child* event queue, hence we really don't
2887 want anything flushed out of the kernel's queue that we
2888 don't care about. Since this is O(n) this means that if you
2889 have a lot of processes you probably want to handle SIGCHLD
2890 yourself.
08cd1552
LP
2891
2892 We do not reap the children here (by using WNOWAIT), this
2893 is only done after the event source is dispatched so that
2894 the callback still sees the process as a zombie.
fd38203a
LP
2895 */
2896
90e74a66 2897 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
2898 assert(s->type == SOURCE_CHILD);
2899
2900 if (s->pending)
2901 continue;
2902
baf76283 2903 if (s->enabled == SD_EVENT_OFF)
fd38203a
LP
2904 continue;
2905
f8f3f926
LP
2906 if (s->child.exited)
2907 continue;
2908
2909 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2910 continue;
2911
fd38203a 2912 zero(s->child.siginfo);
08cd1552
LP
2913 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2914 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
fd38203a
LP
2915 if (r < 0)
2916 return -errno;
2917
2918 if (s->child.siginfo.si_pid != 0) {
945c2931 2919 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 2920
f8f3f926
LP
2921 if (zombie)
2922 s->child.exited = true;
2923
08cd1552
LP
2924 if (!zombie && (s->child.options & WEXITED)) {
2925 /* If the child isn't dead then let's
2926 * immediately remove the state change
2927 * from the queue, since there's no
2928 * benefit in leaving it queued */
2929
2930 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 2931 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
2932 }
2933
fd38203a
LP
2934 r = source_set_pending(s, true);
2935 if (r < 0)
2936 return r;
2937 }
2938 }
2939
fd38203a
LP
2940 return 0;
2941}
2942
f8f3f926
LP
2943static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2944 assert(e);
2945 assert(s);
2946 assert(s->type == SOURCE_CHILD);
2947
2948 if (s->pending)
2949 return 0;
2950
2951 if (s->enabled == SD_EVENT_OFF)
2952 return 0;
2953
2954 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2955 return 0;
2956
2957 zero(s->child.siginfo);
2958 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2959 return -errno;
2960
2961 if (s->child.siginfo.si_pid == 0)
2962 return 0;
2963
2964 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2965 s->child.exited = true;
2966
2967 return source_set_pending(s, true);
2968}
2969
9da4cb2b 2970static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
fd38203a 2971 bool read_one = false;
fd38203a
LP
2972 int r;
2973
da7e457c 2974 assert(e);
97ef5391 2975 assert(d);
305f78bf 2976 assert_return(events == EPOLLIN, -EIO);
fd38203a 2977
9da4cb2b
LP
2978 /* If there's a signal queued on this priority and SIGCHLD is
2979 on this priority too, then make sure to recheck the
2980 children we watch. This is because we only ever dequeue
2981 the first signal per priority, and if we dequeue one, and
2982 SIGCHLD might be enqueued later we wouldn't know, but we
2983 might have higher priority children we care about hence we
2984 need to check that explicitly. */
2985
2986 if (sigismember(&d->sigset, SIGCHLD))
2987 e->need_process_child = true;
2988
2989 /* If there's already an event source pending for this
2990 * priority we don't read another */
2991 if (d->current)
2992 return 0;
2993
fd38203a 2994 for (;;) {
0eb2e0e3 2995 struct signalfd_siginfo si;
7057bd99 2996 ssize_t n;
92daebc0 2997 sd_event_source *s = NULL;
fd38203a 2998
9da4cb2b 2999 n = read(d->fd, &si, sizeof(si));
7057bd99 3000 if (n < 0) {
945c2931 3001 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
3002 return read_one;
3003
3004 return -errno;
3005 }
3006
7057bd99 3007 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3008 return -EIO;
3009
6eb7c172 3010 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3011
fd38203a
LP
3012 read_one = true;
3013
92daebc0
LP
3014 if (e->signal_sources)
3015 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3016 if (!s)
3017 continue;
9da4cb2b
LP
3018 if (s->pending)
3019 continue;
fd38203a
LP
3020
3021 s->signal.siginfo = si;
9da4cb2b
LP
3022 d->current = s;
3023
fd38203a
LP
3024 r = source_set_pending(s, true);
3025 if (r < 0)
3026 return r;
9da4cb2b
LP
3027
3028 return 1;
fd38203a 3029 }
fd38203a
LP
3030}
3031
97ef5391
LP
3032static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3033 ssize_t n;
3034
3035 assert(e);
3036 assert(d);
3037
3038 assert_return(revents == EPOLLIN, -EIO);
3039
3040 /* If there's already an event source pending for this priority, don't read another */
3041 if (d->n_pending > 0)
3042 return 0;
3043
3044 /* Is the read buffer non-empty? If so, let's not read more */
3045 if (d->buffer_filled > 0)
3046 return 0;
3047
3048 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3049 if (n < 0) {
3050 if (IN_SET(errno, EAGAIN, EINTR))
3051 return 0;
3052
3053 return -errno;
3054 }
3055
3056 assert(n > 0);
3057 d->buffer_filled = (size_t) n;
3058 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3059
3060 return 1;
3061}
3062
3063static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3064 assert(e);
3065 assert(d);
3066 assert(sz <= d->buffer_filled);
3067
3068 if (sz == 0)
3069 return;
3070
3071 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3072 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3073 d->buffer_filled -= sz;
3074
3075 if (d->buffer_filled == 0)
3076 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3077}
3078
3079static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3080 int r;
3081
3082 assert(e);
3083 assert(d);
3084
3085 /* If there's already an event source pending for this priority, don't read another */
3086 if (d->n_pending > 0)
3087 return 0;
3088
3089 while (d->buffer_filled > 0) {
3090 size_t sz;
3091
3092 /* Let's validate that the event structures are complete */
3093 if (d->buffer_filled < offsetof(struct inotify_event, name))
3094 return -EIO;
3095
3096 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3097 if (d->buffer_filled < sz)
3098 return -EIO;
3099
3100 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3101 struct inode_data *inode_data;
97ef5391
LP
3102
3103 /* The queue overran, let's pass this event to all event sources connected to this inotify
3104 * object */
3105
90e74a66 3106 HASHMAP_FOREACH(inode_data, d->inodes) {
97ef5391
LP
3107 sd_event_source *s;
3108
3109 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3110
3111 if (s->enabled == SD_EVENT_OFF)
3112 continue;
3113
3114 r = source_set_pending(s, true);
3115 if (r < 0)
3116 return r;
3117 }
3118 }
3119 } else {
3120 struct inode_data *inode_data;
3121 sd_event_source *s;
3122
3123 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3124 * our watch descriptor table. */
3125 if (d->buffer.ev.mask & IN_IGNORED) {
3126
3127 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3128 if (!inode_data) {
3129 event_inotify_data_drop(e, d, sz);
3130 continue;
3131 }
3132
3133 /* The watch descriptor was removed by the kernel, let's drop it here too */
3134 inode_data->wd = -1;
3135 } else {
3136 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3137 if (!inode_data) {
3138 event_inotify_data_drop(e, d, sz);
3139 continue;
3140 }
3141 }
3142
3143 /* Trigger all event sources that are interested in these events. Also trigger all event
3144 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3145 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3146
3147 if (s->enabled == SD_EVENT_OFF)
3148 continue;
3149
3150 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3151 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3152 continue;
3153
3154 r = source_set_pending(s, true);
3155 if (r < 0)
3156 return r;
3157 }
3158 }
3159
3160 /* Something pending now? If so, let's finish, otherwise let's read more. */
3161 if (d->n_pending > 0)
3162 return 1;
3163 }
3164
3165 return 0;
3166}
3167
3168static int process_inotify(sd_event *e) {
3169 struct inotify_data *d;
3170 int r, done = 0;
3171
3172 assert(e);
3173
3174 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3175 r = event_inotify_data_process(e, d);
3176 if (r < 0)
3177 return r;
3178 if (r > 0)
3179 done ++;
3180 }
3181
3182 return done;
3183}
3184
fd38203a 3185static int source_dispatch(sd_event_source *s) {
8f5c235d 3186 EventSourceType saved_type;
fe8245eb 3187 int r = 0;
fd38203a
LP
3188
3189 assert(s);
6203e07a 3190 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3191
8f5c235d
LP
3192 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3193 * the event. */
3194 saved_type = s->type;
3195
945c2931 3196 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3197 r = source_set_pending(s, false);
3198 if (r < 0)
3199 return r;
3200 }
fd38203a 3201
6e9feda3
LP
3202 if (s->type != SOURCE_POST) {
3203 sd_event_source *z;
6e9feda3
LP
3204
3205 /* If we execute a non-post source, let's mark all
3206 * post sources as pending */
3207
90e74a66 3208 SET_FOREACH(z, s->event->post_sources) {
6e9feda3
LP
3209 if (z->enabled == SD_EVENT_OFF)
3210 continue;
3211
3212 r = source_set_pending(z, true);
3213 if (r < 0)
3214 return r;
3215 }
3216 }
3217
baf76283
LP
3218 if (s->enabled == SD_EVENT_ONESHOT) {
3219 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3220 if (r < 0)
3221 return r;
3222 }
3223
12179984 3224 s->dispatching = true;
b7484e2a 3225
fd38203a
LP
3226 switch (s->type) {
3227
3228 case SOURCE_IO:
3229 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3230 break;
3231
6a0f1f6d 3232 case SOURCE_TIME_REALTIME:
a8548816 3233 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3234 case SOURCE_TIME_MONOTONIC:
3235 case SOURCE_TIME_REALTIME_ALARM:
3236 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3237 r = s->time.callback(s, s->time.next, s->userdata);
3238 break;
3239
3240 case SOURCE_SIGNAL:
3241 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3242 break;
3243
08cd1552
LP
3244 case SOURCE_CHILD: {
3245 bool zombie;
3246
945c2931 3247 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3248
fd38203a 3249 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3250
3251 /* Now, reap the PID for good. */
f8f3f926 3252 if (zombie) {
cc59d290 3253 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3254 s->child.waited = true;
3255 }
08cd1552 3256
fd38203a 3257 break;
08cd1552 3258 }
fd38203a
LP
3259
3260 case SOURCE_DEFER:
3261 r = s->defer.callback(s, s->userdata);
3262 break;
da7e457c 3263
6e9feda3
LP
3264 case SOURCE_POST:
3265 r = s->post.callback(s, s->userdata);
3266 break;
3267
6203e07a
LP
3268 case SOURCE_EXIT:
3269 r = s->exit.callback(s, s->userdata);
da7e457c 3270 break;
9d3e3aa5 3271
97ef5391
LP
3272 case SOURCE_INOTIFY: {
3273 struct sd_event *e = s->event;
3274 struct inotify_data *d;
3275 size_t sz;
3276
3277 assert(s->inotify.inode_data);
3278 assert_se(d = s->inotify.inode_data->inotify_data);
3279
3280 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3281 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3282 assert(d->buffer_filled >= sz);
3283
3284 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3285
3286 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3287 * buffer. */
3288 if (d->n_pending == 0)
3289 event_inotify_data_drop(e, d, sz);
3290
3291 break;
3292 }
3293
9d3e3aa5 3294 case SOURCE_WATCHDOG:
a71fe8b8 3295 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3296 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
9d3e3aa5 3297 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
3298 }
3299
12179984
LP
3300 s->dispatching = false;
3301
55cbfaa5
DM
3302 if (r < 0)
3303 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
8f5c235d 3304 strna(s->description), event_source_type_to_string(saved_type));
12179984
LP
3305
3306 if (s->n_ref == 0)
3307 source_free(s);
3308 else if (r < 0)
6203e07a 3309 sd_event_source_set_enabled(s, SD_EVENT_OFF);
b7484e2a 3310
6203e07a 3311 return 1;
fd38203a
LP
3312}
3313
3314static int event_prepare(sd_event *e) {
3315 int r;
3316
3317 assert(e);
3318
3319 for (;;) {
3320 sd_event_source *s;
3321
3322 s = prioq_peek(e->prepare);
baf76283 3323 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
fd38203a
LP
3324 break;
3325
3326 s->prepare_iteration = e->iteration;
3327 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3328 if (r < 0)
3329 return r;
3330
3331 assert(s->prepare);
12179984
LP
3332
3333 s->dispatching = true;
fd38203a 3334 r = s->prepare(s, s->userdata);
12179984
LP
3335 s->dispatching = false;
3336
55cbfaa5
DM
3337 if (r < 0)
3338 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3339 strna(s->description), event_source_type_to_string(s->type));
fd38203a 3340
12179984
LP
3341 if (s->n_ref == 0)
3342 source_free(s);
3343 else if (r < 0)
3344 sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3345 }
3346
3347 return 0;
3348}
3349
6203e07a 3350static int dispatch_exit(sd_event *e) {
da7e457c 3351 sd_event_source *p;
30dd293c 3352 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
da7e457c
LP
3353 int r;
3354
3355 assert(e);
3356
6203e07a 3357 p = prioq_peek(e->exit);
baf76283 3358 if (!p || p->enabled == SD_EVENT_OFF) {
da7e457c
LP
3359 e->state = SD_EVENT_FINISHED;
3360 return 0;
3361 }
3362
30dd293c 3363 ref = sd_event_ref(e);
da7e457c 3364 e->iteration++;
6203e07a 3365 e->state = SD_EVENT_EXITING;
da7e457c 3366 r = source_dispatch(p);
2b0c9ef7 3367 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3368 return r;
3369}
3370
c2ba3ad6
LP
3371static sd_event_source* event_next_pending(sd_event *e) {
3372 sd_event_source *p;
3373
da7e457c
LP
3374 assert(e);
3375
c2ba3ad6
LP
3376 p = prioq_peek(e->pending);
3377 if (!p)
3378 return NULL;
3379
baf76283 3380 if (p->enabled == SD_EVENT_OFF)
c2ba3ad6
LP
3381 return NULL;
3382
3383 return p;
3384}
3385
cde93897
LP
3386static int arm_watchdog(sd_event *e) {
3387 struct itimerspec its = {};
3388 usec_t t;
3389 int r;
3390
3391 assert(e);
3392 assert(e->watchdog_fd >= 0);
3393
3394 t = sleep_between(e,
3395 e->watchdog_last + (e->watchdog_period / 2),
3396 e->watchdog_last + (e->watchdog_period * 3 / 4));
3397
3398 timespec_store(&its.it_value, t);
3399
75145780
LP
3400 /* Make sure we never set the watchdog to 0, which tells the
3401 * kernel to disable it. */
3402 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3403 its.it_value.tv_nsec = 1;
3404
cde93897
LP
3405 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3406 if (r < 0)
3407 return -errno;
3408
3409 return 0;
3410}
3411
3412static int process_watchdog(sd_event *e) {
3413 assert(e);
3414
3415 if (!e->watchdog)
3416 return 0;
3417
3418 /* Don't notify watchdog too often */
3419 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3420 return 0;
3421
3422 sd_notify(false, "WATCHDOG=1");
3423 e->watchdog_last = e->timestamp.monotonic;
3424
3425 return arm_watchdog(e);
3426}
3427
97ef5391
LP
3428static void event_close_inode_data_fds(sd_event *e) {
3429 struct inode_data *d;
3430
3431 assert(e);
3432
3433 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3434 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3435 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3436 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3437 * compromise. */
3438
3439 while ((d = e->inode_data_to_close)) {
3440 assert(d->fd >= 0);
3441 d->fd = safe_close(d->fd);
3442
3443 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3444 }
3445}
3446
c45a5a74
TG
3447_public_ int sd_event_prepare(sd_event *e) {
3448 int r;
fd38203a 3449
da7e457c 3450 assert_return(e, -EINVAL);
b937d761 3451 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3452 assert_return(!event_pid_changed(e), -ECHILD);
3453 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3454 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3455
e5446015
LP
3456 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3457 * this check here once, since gettid() is typically not cached, and thus want to minimize
3458 * syscalls */
3459 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3460
6203e07a 3461 if (e->exit_requested)
c45a5a74 3462 goto pending;
fd38203a
LP
3463
3464 e->iteration++;
3465
0be6c2f6 3466 e->state = SD_EVENT_PREPARING;
fd38203a 3467 r = event_prepare(e);
0be6c2f6 3468 e->state = SD_EVENT_INITIAL;
fd38203a 3469 if (r < 0)
c45a5a74 3470 return r;
fd38203a 3471
6a0f1f6d
LP
3472 r = event_arm_timer(e, &e->realtime);
3473 if (r < 0)
c45a5a74 3474 return r;
6a0f1f6d 3475
a8548816
TG
3476 r = event_arm_timer(e, &e->boottime);
3477 if (r < 0)
c45a5a74 3478 return r;
a8548816 3479
6a0f1f6d
LP
3480 r = event_arm_timer(e, &e->monotonic);
3481 if (r < 0)
c45a5a74 3482 return r;
6a0f1f6d
LP
3483
3484 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3485 if (r < 0)
c45a5a74 3486 return r;
fd38203a 3487
6a0f1f6d 3488 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3489 if (r < 0)
c45a5a74 3490 return r;
fd38203a 3491
97ef5391
LP
3492 event_close_inode_data_fds(e);
3493
1b5995b0 3494 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3495 goto pending;
3496
2b0c9ef7 3497 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3498
3499 return 0;
3500
3501pending:
2b0c9ef7 3502 e->state = SD_EVENT_ARMED;
6d148a84
TG
3503 r = sd_event_wait(e, 0);
3504 if (r == 0)
2b0c9ef7 3505 e->state = SD_EVENT_ARMED;
6d148a84
TG
3506
3507 return r;
c45a5a74
TG
3508}
3509
3510_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
5cddd924 3511 size_t event_queue_max;
c45a5a74
TG
3512 int r, m, i;
3513
3514 assert_return(e, -EINVAL);
b937d761 3515 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3516 assert_return(!event_pid_changed(e), -ECHILD);
3517 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3518 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
c45a5a74
TG
3519
3520 if (e->exit_requested) {
3521 e->state = SD_EVENT_PENDING;
3522 return 1;
3523 }
6a0f1f6d 3524
5cddd924
LP
3525 event_queue_max = MAX(e->n_sources, 1u);
3526 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3527 return -ENOMEM;
fd38203a 3528
97ef5391
LP
3529 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3530 if (e->inotify_data_buffered)
3531 timeout = 0;
3532
5cddd924 3533 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
bab4820e 3534 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
da7e457c 3535 if (m < 0) {
c45a5a74
TG
3536 if (errno == EINTR) {
3537 e->state = SD_EVENT_PENDING;
3538 return 1;
3539 }
3540
3541 r = -errno;
da7e457c
LP
3542 goto finish;
3543 }
fd38203a 3544
e475d10c 3545 triple_timestamp_get(&e->timestamp);
fd38203a
LP
3546
3547 for (i = 0; i < m; i++) {
3548
5cddd924
LP
3549 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3550 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3551 else {
5cddd924 3552 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3553
3554 switch (*t) {
3555
f8f3f926 3556 case WAKEUP_EVENT_SOURCE: {
5cddd924 3557 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
3558
3559 assert(s);
3560
3561 switch (s->type) {
3562
3563 case SOURCE_IO:
5cddd924 3564 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
3565 break;
3566
3567 case SOURCE_CHILD:
5cddd924 3568 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
3569 break;
3570
3571 default:
3572 assert_not_reached("Unexpected event source type");
3573 }
3574
9da4cb2b 3575 break;
f8f3f926 3576 }
fd38203a 3577
9da4cb2b 3578 case WAKEUP_CLOCK_DATA: {
5cddd924 3579 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
3580
3581 assert(d);
3582
5cddd924 3583 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
3584 break;
3585 }
3586
3587 case WAKEUP_SIGNAL_DATA:
5cddd924 3588 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
9da4cb2b
LP
3589 break;
3590
97ef5391 3591 case WAKEUP_INOTIFY_DATA:
5cddd924 3592 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
97ef5391
LP
3593 break;
3594
9da4cb2b
LP
3595 default:
3596 assert_not_reached("Invalid wake-up pointer");
3597 }
3598 }
fd38203a 3599 if (r < 0)
da7e457c 3600 goto finish;
fd38203a
LP
3601 }
3602
cde93897
LP
3603 r = process_watchdog(e);
3604 if (r < 0)
3605 goto finish;
3606
6a0f1f6d
LP
3607 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3608 if (r < 0)
3609 goto finish;
3610
e475d10c 3611 r = process_timer(e, e->timestamp.boottime, &e->boottime);
a8548816
TG
3612 if (r < 0)
3613 goto finish;
3614
6a0f1f6d
LP
3615 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3616 if (r < 0)
3617 goto finish;
3618
3619 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 3620 if (r < 0)
da7e457c 3621 goto finish;
fd38203a 3622
e475d10c 3623 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 3624 if (r < 0)
da7e457c 3625 goto finish;
fd38203a 3626
c2ba3ad6 3627 if (e->need_process_child) {
fd38203a
LP
3628 r = process_child(e);
3629 if (r < 0)
da7e457c 3630 goto finish;
fd38203a
LP
3631 }
3632
97ef5391
LP
3633 r = process_inotify(e);
3634 if (r < 0)
3635 goto finish;
3636
c45a5a74
TG
3637 if (event_next_pending(e)) {
3638 e->state = SD_EVENT_PENDING;
3639
3640 return 1;
da7e457c
LP
3641 }
3642
c45a5a74 3643 r = 0;
fd38203a 3644
da7e457c 3645finish:
2b0c9ef7 3646 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3647
3648 return r;
fd38203a
LP
3649}
3650
c45a5a74
TG
3651_public_ int sd_event_dispatch(sd_event *e) {
3652 sd_event_source *p;
3653 int r;
3654
3655 assert_return(e, -EINVAL);
b937d761 3656 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3657 assert_return(!event_pid_changed(e), -ECHILD);
3658 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3659 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3660
3661 if (e->exit_requested)
3662 return dispatch_exit(e);
3663
3664 p = event_next_pending(e);
3665 if (p) {
30dd293c 3666 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
c45a5a74 3667
30dd293c 3668 ref = sd_event_ref(e);
c45a5a74
TG
3669 e->state = SD_EVENT_RUNNING;
3670 r = source_dispatch(p);
2b0c9ef7 3671 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3672 return r;
3673 }
3674
2b0c9ef7 3675 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3676
3677 return 1;
3678}
3679
34b87517 3680static void event_log_delays(sd_event *e) {
442ac269
YW
3681 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3682 size_t l, i;
34b87517 3683
442ac269
YW
3684 p = b;
3685 l = sizeof(b);
3686 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3687 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
3688 e->delays[i] = 0;
3689 }
442ac269 3690 log_debug("Event loop iterations: %s", b);
34b87517
VC
3691}
3692
c45a5a74
TG
3693_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3694 int r;
3695
3696 assert_return(e, -EINVAL);
b937d761 3697 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3698 assert_return(!event_pid_changed(e), -ECHILD);
3699 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3700 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 3701
34b87517
VC
3702 if (e->profile_delays && e->last_run) {
3703 usec_t this_run;
3704 unsigned l;
3705
3706 this_run = now(CLOCK_MONOTONIC);
3707
3708 l = u64log2(this_run - e->last_run);
3709 assert(l < sizeof(e->delays));
3710 e->delays[l]++;
3711
3712 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3713 event_log_delays(e);
3714 e->last_log = this_run;
3715 }
3716 }
3717
c45a5a74 3718 r = sd_event_prepare(e);
53bac4e0
LP
3719 if (r == 0)
3720 /* There was nothing? Then wait... */
3721 r = sd_event_wait(e, timeout);
c45a5a74 3722
34b87517
VC
3723 if (e->profile_delays)
3724 e->last_run = now(CLOCK_MONOTONIC);
3725
02d30981 3726 if (r > 0) {
53bac4e0 3727 /* There's something now, then let's dispatch it */
02d30981
TG
3728 r = sd_event_dispatch(e);
3729 if (r < 0)
3730 return r;
53bac4e0
LP
3731
3732 return 1;
3733 }
3734
3735 return r;
c45a5a74
TG
3736}
3737
f7262a9f 3738_public_ int sd_event_loop(sd_event *e) {
30dd293c 3739 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
fd38203a
LP
3740 int r;
3741
da7e457c 3742 assert_return(e, -EINVAL);
b937d761 3743 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 3744 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 3745 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3746
30dd293c 3747 ref = sd_event_ref(e);
fd38203a 3748
da7e457c 3749 while (e->state != SD_EVENT_FINISHED) {
fd38203a
LP
3750 r = sd_event_run(e, (uint64_t) -1);
3751 if (r < 0)
30dd293c 3752 return r;
fd38203a
LP
3753 }
3754
30dd293c 3755 return e->exit_code;
fd38203a
LP
3756}
3757
9b364545
TG
3758_public_ int sd_event_get_fd(sd_event *e) {
3759
3760 assert_return(e, -EINVAL);
b937d761 3761 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
3762 assert_return(!event_pid_changed(e), -ECHILD);
3763
3764 return e->epoll_fd;
3765}
3766
f7262a9f 3767_public_ int sd_event_get_state(sd_event *e) {
da7e457c 3768 assert_return(e, -EINVAL);
b937d761 3769 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3770 assert_return(!event_pid_changed(e), -ECHILD);
3771
3772 return e->state;
3773}
3774
6203e07a 3775_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 3776 assert_return(e, -EINVAL);
b937d761 3777 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 3778 assert_return(code, -EINVAL);
da7e457c 3779 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3780
6203e07a
LP
3781 if (!e->exit_requested)
3782 return -ENODATA;
3783
3784 *code = e->exit_code;
3785 return 0;
fd38203a
LP
3786}
3787
6203e07a 3788_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 3789 assert_return(e, -EINVAL);
b937d761 3790 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3791 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3792 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3793
6203e07a
LP
3794 e->exit_requested = true;
3795 e->exit_code = code;
3796
fd38203a
LP
3797 return 0;
3798}
46e8c825 3799
6a0f1f6d 3800_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 3801 assert_return(e, -EINVAL);
b937d761 3802 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 3803 assert_return(usec, -EINVAL);
46e8c825
LP
3804 assert_return(!event_pid_changed(e), -ECHILD);
3805
e475d10c
LP
3806 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3807 return -EOPNOTSUPP;
3808
3809 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3810 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3811 * the purpose of getting the time this doesn't matter. */
3411372e
LP
3812 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3813 return -EOPNOTSUPP;
3814
e475d10c 3815 if (!triple_timestamp_is_set(&e->timestamp)) {
38a03f06
LP
3816 /* Implicitly fall back to now() if we never ran
3817 * before and thus have no cached time. */
3818 *usec = now(clock);
3819 return 1;
3820 }
46e8c825 3821
e475d10c 3822 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
3823 return 0;
3824}
afc6adb5
LP
3825
3826_public_ int sd_event_default(sd_event **ret) {
39883f62 3827 sd_event *e = NULL;
afc6adb5
LP
3828 int r;
3829
3830 if (!ret)
3831 return !!default_event;
3832
3833 if (default_event) {
3834 *ret = sd_event_ref(default_event);
3835 return 0;
3836 }
3837
3838 r = sd_event_new(&e);
3839 if (r < 0)
3840 return r;
3841
3842 e->default_event_ptr = &default_event;
3843 e->tid = gettid();
3844 default_event = e;
3845
3846 *ret = e;
3847 return 1;
3848}
3849
3850_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3851 assert_return(e, -EINVAL);
b937d761 3852 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 3853 assert_return(tid, -EINVAL);
76b54375 3854 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 3855
76b54375
LP
3856 if (e->tid != 0) {
3857 *tid = e->tid;
3858 return 0;
3859 }
3860
3861 return -ENXIO;
afc6adb5 3862}
cde93897
LP
3863
3864_public_ int sd_event_set_watchdog(sd_event *e, int b) {
3865 int r;
3866
3867 assert_return(e, -EINVAL);
b937d761 3868 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 3869 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
3870
3871 if (e->watchdog == !!b)
3872 return e->watchdog;
3873
3874 if (b) {
09812eb7
LP
3875 r = sd_watchdog_enabled(false, &e->watchdog_period);
3876 if (r <= 0)
cde93897 3877 return r;
cde93897
LP
3878
3879 /* Issue first ping immediately */
3880 sd_notify(false, "WATCHDOG=1");
3881 e->watchdog_last = now(CLOCK_MONOTONIC);
3882
3883 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3884 if (e->watchdog_fd < 0)
3885 return -errno;
3886
3887 r = arm_watchdog(e);
3888 if (r < 0)
3889 goto fail;
3890
1eac7948 3891 struct epoll_event ev = {
a82f89aa
LP
3892 .events = EPOLLIN,
3893 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3894 };
cde93897
LP
3895
3896 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3897 if (r < 0) {
3898 r = -errno;
3899 goto fail;
3900 }
3901
3902 } else {
3903 if (e->watchdog_fd >= 0) {
5a795bff 3904 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 3905 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3906 }
3907 }
3908
3909 e->watchdog = !!b;
3910 return e->watchdog;
3911
3912fail:
03e334a1 3913 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3914 return r;
3915}
8f726607
LP
3916
3917_public_ int sd_event_get_watchdog(sd_event *e) {
3918 assert_return(e, -EINVAL);
b937d761 3919 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
3920 assert_return(!event_pid_changed(e), -ECHILD);
3921
3922 return e->watchdog;
3923}
60a3b1e1
LP
3924
3925_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3926 assert_return(e, -EINVAL);
b937d761 3927 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
3928 assert_return(!event_pid_changed(e), -ECHILD);
3929
3930 *ret = e->iteration;
3931 return 0;
3932}
15723a1d
LP
3933
3934_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3935 assert_return(s, -EINVAL);
3936
3937 s->destroy_callback = callback;
3938 return 0;
3939}
3940
3941_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3942 assert_return(s, -EINVAL);
3943
3944 if (ret)
3945 *ret = s->destroy_callback;
3946
3947 return !!s->destroy_callback;
3948}
2382c936
YW
3949
3950_public_ int sd_event_source_get_floating(sd_event_source *s) {
3951 assert_return(s, -EINVAL);
3952
3953 return s->floating;
3954}
3955
3956_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3957 assert_return(s, -EINVAL);
3958
3959 if (s->floating == !!b)
3960 return 0;
3961
3962 if (!s->event) /* Already disconnected */
3963 return -ESTALE;
3964
3965 s->floating = b;
3966
3967 if (b) {
3968 sd_event_source_ref(s);
3969 sd_event_unref(s->event);
3970 } else {
3971 sd_event_ref(s->event);
3972 sd_event_source_unref(s);
3973 }
3974
3975 return 1;
3976}