]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
Merge pull request #17185 from yuwata/ethtool-update
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
55cbfaa5
DM
40static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 53 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
54};
55
56DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
a8548816 58#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
6a0f1f6d 59
fd38203a 60struct sd_event {
da7e457c 61 unsigned n_ref;
fd38203a
LP
62
63 int epoll_fd;
cde93897 64 int watchdog_fd;
fd38203a
LP
65
66 Prioq *pending;
67 Prioq *prepare;
c2ba3ad6 68
a8548816 69 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
a8548816 73 struct clock_data boottime;
6a0f1f6d
LP
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
fd38203a 77
da7e457c
LP
78 usec_t perturb;
79
9da4cb2b
LP
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
82
83 Hashmap *child_sources;
baf76283 84 unsigned n_enabled_child_sources;
fd38203a 85
6e9feda3
LP
86 Set *post_sources;
87
6203e07a 88 Prioq *exit;
fd38203a 89
97ef5391
LP
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
da7e457c 98 pid_t original_pid;
c2ba3ad6 99
60a3b1e1 100 uint64_t iteration;
e475d10c 101 triple_timestamp timestamp;
da7e457c 102 int state;
eaa3cbef 103
6203e07a 104 bool exit_requested:1;
da7e457c 105 bool need_process_child:1;
cde93897 106 bool watchdog:1;
34b87517 107 bool profile_delays:1;
afc6adb5 108
6203e07a
LP
109 int exit_code;
110
afc6adb5
LP
111 pid_t tid;
112 sd_event **default_event_ptr;
cde93897
LP
113
114 usec_t watchdog_last, watchdog_period;
15b38f93
LP
115
116 unsigned n_sources;
a71fe8b8 117
5cddd924
LP
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
a71fe8b8 121 LIST_HEAD(sd_event_source, sources);
34b87517
VC
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
125};
126
b937d761
NM
127static thread_local sd_event *default_event = NULL;
128
a71fe8b8 129static void source_disconnect(sd_event_source *s);
97ef5391 130static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 131
b937d761
NM
132static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134}
135
fd38203a
LP
136static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
9c57a73b 138 int r;
fd38203a
LP
139
140 assert(x->pending);
141 assert(y->pending);
142
baf76283
LP
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 145 return -1;
baf76283 146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
147 return 1;
148
149 /* Lower priority values first */
9c57a73b
YW
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
fd38203a
LP
153
154 /* Older entries first */
9c57a73b 155 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
156}
157
158static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
9c57a73b 160 int r;
fd38203a
LP
161
162 assert(x->prepare);
163 assert(y->prepare);
164
8046c457
KK
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
fd38203a
LP
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
9c57a73b
YW
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
fd38203a 177
fd38203a 178 /* Lower priority values first */
9c57a73b 179 return CMP(x->priority, y->priority);
fd38203a
LP
180}
181
c2ba3ad6 182static int earliest_time_prioq_compare(const void *a, const void *b) {
fd38203a
LP
183 const sd_event_source *x = a, *y = b;
184
6a0f1f6d
LP
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
fd38203a 187
baf76283
LP
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 190 return -1;
baf76283 191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
9c57a73b 201 return CMP(x->time.next, y->time.next);
fd38203a
LP
202}
203
1bce0ffa
LP
204static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206}
207
c2ba3ad6
LP
208static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
6a0f1f6d
LP
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
c2ba3ad6 213
baf76283
LP
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
c2ba3ad6 216 return -1;
baf76283 217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
c2ba3ad6
LP
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
9c57a73b 227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
c2ba3ad6
LP
228}
229
6203e07a 230static int exit_prioq_compare(const void *a, const void *b) {
da7e457c
LP
231 const sd_event_source *x = a, *y = b;
232
6203e07a
LP
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
da7e457c 235
baf76283
LP
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
da7e457c 238 return -1;
baf76283 239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
da7e457c
LP
240 return 1;
241
242 /* Lower priority values first */
6dd91b36 243 return CMP(x->priority, y->priority);
da7e457c
LP
244}
245
6a0f1f6d
LP
246static void free_clock_data(struct clock_data *d) {
247 assert(d);
9da4cb2b 248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253}
254
8301aa0b 255static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
256 sd_event_source *s;
257
fd38203a 258 assert(e);
a71fe8b8
LP
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
15b38f93 266 assert(e->n_sources == 0);
fd38203a 267
afc6adb5
LP
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
03e334a1 271 safe_close(e->epoll_fd);
03e334a1 272 safe_close(e->watchdog_fd);
cde93897 273
6a0f1f6d 274 free_clock_data(&e->realtime);
a8548816 275 free_clock_data(&e->boottime);
6a0f1f6d
LP
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
fd38203a
LP
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
6203e07a 282 prioq_free(e->exit);
fd38203a
LP
283
284 free(e->signal_sources);
9da4cb2b 285 hashmap_free(e->signal_data);
fd38203a 286
97ef5391
LP
287 hashmap_free(e->inotify_data);
288
fd38203a 289 hashmap_free(e->child_sources);
6e9feda3 290 set_free(e->post_sources);
8301aa0b 291
5cddd924
LP
292 free(e->event_queue);
293
8301aa0b 294 return mfree(e);
fd38203a
LP
295}
296
f7262a9f 297_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
298 sd_event *e;
299 int r;
300
305f78bf 301 assert_return(ret, -EINVAL);
fd38203a 302
d08eb1fa 303 e = new(sd_event, 1);
fd38203a
LP
304 if (!e)
305 return -ENOMEM;
306
d08eb1fa
LP
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
fd38203a 329
c983e776
EV
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
fd38203a 332 goto fail;
fd38203a
LP
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
7fe2903c
LP
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
34b87517 342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
34a6843d 343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
34b87517
VC
344 e->profile_delays = true;
345 }
346
fd38203a
LP
347 *ret = e;
348 return 0;
349
350fail:
351 event_free(e);
352 return r;
353}
354
8301aa0b 355DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 356
afd15bbb
ZJS
357_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361}
362
eaa3cbef
LP
363static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
a2360a46 366 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
367 * it around over a fork(). Let's complain. */
368
df0ff127 369 return e->original_pid != getpid_cached();
eaa3cbef
LP
370}
371
366e6411 372static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
f6806734 376 if (event_pid_changed(s->event))
366e6411 377 return;
f6806734 378
fd38203a 379 if (!s->io.registered)
366e6411 380 return;
fd38203a 381
d1cf2023 382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
55cbfaa5
DM
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
385
386 s->io.registered = false;
fd38203a
LP
387}
388
305f78bf
LP
389static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
fd38203a
LP
394 assert(s);
395 assert(s->type == SOURCE_IO);
baf76283 396 assert(enabled != SD_EVENT_OFF);
fd38203a 397
1eac7948 398 struct epoll_event ev = {
a82f89aa
LP
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
fd38203a 402
15c689d7 403 if (epoll_ctl(s->event->epoll_fd,
1eac7948
ZJS
404 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
405 s->io.fd,
15c689d7 406 &ev) < 0)
fd38203a
LP
407 return -errno;
408
409 s->io.registered = true;
410
411 return 0;
412}
413
f8f3f926
LP
414static void source_child_pidfd_unregister(sd_event_source *s) {
415 assert(s);
416 assert(s->type == SOURCE_CHILD);
417
418 if (event_pid_changed(s->event))
419 return;
420
421 if (!s->child.registered)
422 return;
423
424 if (EVENT_SOURCE_WATCH_PIDFD(s))
425 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
426 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
427 strna(s->description), event_source_type_to_string(s->type));
428
429 s->child.registered = false;
430}
431
432static int source_child_pidfd_register(sd_event_source *s, int enabled) {
433 int r;
434
435 assert(s);
436 assert(s->type == SOURCE_CHILD);
437 assert(enabled != SD_EVENT_OFF);
438
439 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 440 struct epoll_event ev = {
f8f3f926
LP
441 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
442 .data.ptr = s,
443 };
444
445 if (s->child.registered)
446 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
447 else
448 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
449 if (r < 0)
450 return -errno;
451 }
452
453 s->child.registered = true;
454 return 0;
455}
456
6a0f1f6d
LP
457static clockid_t event_source_type_to_clock(EventSourceType t) {
458
459 switch (t) {
460
461 case SOURCE_TIME_REALTIME:
462 return CLOCK_REALTIME;
463
a8548816
TG
464 case SOURCE_TIME_BOOTTIME:
465 return CLOCK_BOOTTIME;
466
6a0f1f6d
LP
467 case SOURCE_TIME_MONOTONIC:
468 return CLOCK_MONOTONIC;
469
470 case SOURCE_TIME_REALTIME_ALARM:
471 return CLOCK_REALTIME_ALARM;
472
473 case SOURCE_TIME_BOOTTIME_ALARM:
474 return CLOCK_BOOTTIME_ALARM;
475
476 default:
477 return (clockid_t) -1;
478 }
479}
480
481static EventSourceType clock_to_event_source_type(clockid_t clock) {
482
483 switch (clock) {
484
485 case CLOCK_REALTIME:
486 return SOURCE_TIME_REALTIME;
487
a8548816
TG
488 case CLOCK_BOOTTIME:
489 return SOURCE_TIME_BOOTTIME;
490
6a0f1f6d
LP
491 case CLOCK_MONOTONIC:
492 return SOURCE_TIME_MONOTONIC;
493
494 case CLOCK_REALTIME_ALARM:
495 return SOURCE_TIME_REALTIME_ALARM;
496
497 case CLOCK_BOOTTIME_ALARM:
498 return SOURCE_TIME_BOOTTIME_ALARM;
499
500 default:
501 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
502 }
503}
504
505static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
506 assert(e);
507
508 switch (t) {
509
510 case SOURCE_TIME_REALTIME:
511 return &e->realtime;
512
a8548816
TG
513 case SOURCE_TIME_BOOTTIME:
514 return &e->boottime;
515
6a0f1f6d
LP
516 case SOURCE_TIME_MONOTONIC:
517 return &e->monotonic;
518
519 case SOURCE_TIME_REALTIME_ALARM:
520 return &e->realtime_alarm;
521
522 case SOURCE_TIME_BOOTTIME_ALARM:
523 return &e->boottime_alarm;
524
525 default:
526 return NULL;
527 }
528}
529
3e4eb8e7
YW
530static void event_free_signal_data(sd_event *e, struct signal_data *d) {
531 assert(e);
532
533 if (!d)
534 return;
535
536 hashmap_remove(e->signal_data, &d->priority);
537 safe_close(d->fd);
538 free(d);
539}
540
9da4cb2b
LP
541static int event_make_signal_data(
542 sd_event *e,
543 int sig,
544 struct signal_data **ret) {
4807d2d0 545
9da4cb2b
LP
546 struct signal_data *d;
547 bool added = false;
548 sigset_t ss_copy;
549 int64_t priority;
f95387cd
ZJS
550 int r;
551
552 assert(e);
553
f6806734 554 if (event_pid_changed(e))
9da4cb2b 555 return -ECHILD;
f6806734 556
9da4cb2b
LP
557 if (e->signal_sources && e->signal_sources[sig])
558 priority = e->signal_sources[sig]->priority;
559 else
de05913d 560 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 561
9da4cb2b
LP
562 d = hashmap_get(e->signal_data, &priority);
563 if (d) {
564 if (sigismember(&d->sigset, sig) > 0) {
565 if (ret)
566 *ret = d;
567 return 0;
568 }
569 } else {
570 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
571 if (r < 0)
572 return r;
573
d08eb1fa 574 d = new(struct signal_data, 1);
9da4cb2b
LP
575 if (!d)
576 return -ENOMEM;
577
d08eb1fa
LP
578 *d = (struct signal_data) {
579 .wakeup = WAKEUP_SIGNAL_DATA,
580 .fd = -1,
581 .priority = priority,
582 };
9da4cb2b
LP
583
584 r = hashmap_put(e->signal_data, &d->priority, d);
90f604d1
ZJS
585 if (r < 0) {
586 free(d);
9da4cb2b 587 return r;
90f604d1 588 }
f95387cd 589
9da4cb2b
LP
590 added = true;
591 }
592
593 ss_copy = d->sigset;
594 assert_se(sigaddset(&ss_copy, sig) >= 0);
595
596 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
597 if (r < 0) {
598 r = -errno;
599 goto fail;
600 }
601
602 d->sigset = ss_copy;
f95387cd 603
9da4cb2b
LP
604 if (d->fd >= 0) {
605 if (ret)
606 *ret = d;
f95387cd 607 return 0;
9da4cb2b
LP
608 }
609
7fe2903c 610 d->fd = fd_move_above_stdio(r);
f95387cd 611
1eac7948 612 struct epoll_event ev = {
a82f89aa
LP
613 .events = EPOLLIN,
614 .data.ptr = d,
615 };
f95387cd 616
15c689d7 617 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
618 r = -errno;
619 goto fail;
f95387cd
ZJS
620 }
621
9da4cb2b
LP
622 if (ret)
623 *ret = d;
624
f95387cd 625 return 0;
9da4cb2b
LP
626
627fail:
3e4eb8e7
YW
628 if (added)
629 event_free_signal_data(e, d);
9da4cb2b
LP
630
631 return r;
632}
633
634static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
635 assert(e);
636 assert(d);
637
638 /* Turns off the specified signal in the signal data
639 * object. If the signal mask of the object becomes empty that
640 * way removes it. */
641
642 if (sigismember(&d->sigset, sig) == 0)
643 return;
644
645 assert_se(sigdelset(&d->sigset, sig) >= 0);
646
647 if (sigisemptyset(&d->sigset)) {
9da4cb2b 648 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 649 event_free_signal_data(e, d);
9da4cb2b
LP
650 return;
651 }
652
653 assert(d->fd >= 0);
654
655 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
656 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
657}
658
659static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
660 struct signal_data *d;
661 static const int64_t zero_priority = 0;
662
663 assert(e);
664
f8f3f926
LP
665 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
666 * and possibly drop the signalfd for it. */
9da4cb2b
LP
667
668 if (sig == SIGCHLD &&
669 e->n_enabled_child_sources > 0)
670 return;
671
672 if (e->signal_sources &&
673 e->signal_sources[sig] &&
674 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
675 return;
676
677 /*
678 * The specified signal might be enabled in three different queues:
679 *
680 * 1) the one that belongs to the priority passed (if it is non-NULL)
681 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
682 * 3) the 0 priority (to cover the SIGCHLD case)
683 *
684 * Hence, let's remove it from all three here.
685 */
686
687 if (priority) {
688 d = hashmap_get(e->signal_data, priority);
689 if (d)
690 event_unmask_signal_data(e, d, sig);
691 }
692
693 if (e->signal_sources && e->signal_sources[sig]) {
694 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
695 if (d)
696 event_unmask_signal_data(e, d, sig);
697 }
698
699 d = hashmap_get(e->signal_data, &zero_priority);
700 if (d)
701 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
702}
703
a71fe8b8
LP
704static void source_disconnect(sd_event_source *s) {
705 sd_event *event;
706
fd38203a
LP
707 assert(s);
708
a71fe8b8
LP
709 if (!s->event)
710 return;
15b38f93 711
a71fe8b8 712 assert(s->event->n_sources > 0);
fd38203a 713
a71fe8b8 714 switch (s->type) {
fd38203a 715
a71fe8b8
LP
716 case SOURCE_IO:
717 if (s->io.fd >= 0)
718 source_io_unregister(s);
fd38203a 719
a71fe8b8 720 break;
6a0f1f6d 721
a71fe8b8 722 case SOURCE_TIME_REALTIME:
a8548816 723 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
724 case SOURCE_TIME_MONOTONIC:
725 case SOURCE_TIME_REALTIME_ALARM:
726 case SOURCE_TIME_BOOTTIME_ALARM: {
727 struct clock_data *d;
fd38203a 728
a71fe8b8
LP
729 d = event_get_clock_data(s->event, s->type);
730 assert(d);
731
732 prioq_remove(d->earliest, s, &s->time.earliest_index);
733 prioq_remove(d->latest, s, &s->time.latest_index);
212bbb17 734 d->needs_rearm = true;
a71fe8b8
LP
735 break;
736 }
737
738 case SOURCE_SIGNAL:
739 if (s->signal.sig > 0) {
9da4cb2b 740
a71fe8b8
LP
741 if (s->event->signal_sources)
742 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 743
9da4cb2b 744 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 745 }
fd38203a 746
a71fe8b8 747 break;
fd38203a 748
a71fe8b8
LP
749 case SOURCE_CHILD:
750 if (s->child.pid > 0) {
751 if (s->enabled != SD_EVENT_OFF) {
752 assert(s->event->n_enabled_child_sources > 0);
753 s->event->n_enabled_child_sources--;
4807d2d0 754 }
fd38203a 755
4a0b58c4 756 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 757 }
fd38203a 758
f8f3f926
LP
759 if (EVENT_SOURCE_WATCH_PIDFD(s))
760 source_child_pidfd_unregister(s);
761 else
762 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
763
a71fe8b8 764 break;
fd38203a 765
a71fe8b8
LP
766 case SOURCE_DEFER:
767 /* nothing */
768 break;
fd38203a 769
a71fe8b8
LP
770 case SOURCE_POST:
771 set_remove(s->event->post_sources, s);
772 break;
da7e457c 773
a71fe8b8
LP
774 case SOURCE_EXIT:
775 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
776 break;
0eb2e0e3 777
97ef5391
LP
778 case SOURCE_INOTIFY: {
779 struct inode_data *inode_data;
780
781 inode_data = s->inotify.inode_data;
782 if (inode_data) {
783 struct inotify_data *inotify_data;
784 assert_se(inotify_data = inode_data->inotify_data);
785
786 /* Detach this event source from the inode object */
787 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
788 s->inotify.inode_data = NULL;
789
790 if (s->pending) {
791 assert(inotify_data->n_pending > 0);
792 inotify_data->n_pending--;
793 }
794
795 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
796 * continued to being watched. That's because inotify doesn't really have an API for that: we
797 * can only change watch masks with access to the original inode either by fd or by path. But
798 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 799 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
800 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
801 * there), but given the need for open_by_handle_at() which is privileged and not universally
802 * available this would be quite an incomplete solution. Hence we go the other way, leave the
803 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
804 * anymore after reception. Yes, this sucks, but … Linux … */
805
806 /* Maybe release the inode data (and its inotify) */
807 event_gc_inode_data(s->event, inode_data);
808 }
809
810 break;
811 }
812
a71fe8b8
LP
813 default:
814 assert_not_reached("Wut? I shouldn't exist.");
815 }
6e9feda3 816
a71fe8b8
LP
817 if (s->pending)
818 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 819
a71fe8b8
LP
820 if (s->prepare)
821 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 822
e514aa1e 823 event = TAKE_PTR(s->event);
a71fe8b8
LP
824 LIST_REMOVE(sources, event->sources, s);
825 event->n_sources--;
fd38203a 826
f5982559
LP
827 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
828 * pidfd associated with this event source, which we'll do only on source_free(). */
829
a71fe8b8
LP
830 if (!s->floating)
831 sd_event_unref(event);
832}
833
834static void source_free(sd_event_source *s) {
835 assert(s);
fd38203a 836
a71fe8b8 837 source_disconnect(s);
ab93297c
NM
838
839 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
840 s->io.fd = safe_close(s->io.fd);
841
f8f3f926
LP
842 if (s->type == SOURCE_CHILD) {
843 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
844
845 if (s->child.process_owned) {
846
847 if (!s->child.exited) {
848 bool sent = false;
849
850 if (s->child.pidfd >= 0) {
851 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
852 if (errno == ESRCH) /* Already dead */
853 sent = true;
854 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
855 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
856 s->child.pid);
857 } else
858 sent = true;
859 }
860
861 if (!sent)
862 if (kill(s->child.pid, SIGKILL) < 0)
863 if (errno != ESRCH) /* Already dead */
864 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
865 s->child.pid);
866 }
867
868 if (!s->child.waited) {
869 siginfo_t si = {};
870
871 /* Reap the child if we can */
872 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
873 }
874 }
875
876 if (s->child.pidfd_owned)
877 s->child.pidfd = safe_close(s->child.pidfd);
878 }
879
15723a1d
LP
880 if (s->destroy_callback)
881 s->destroy_callback(s->userdata);
ab93297c 882
356779df 883 free(s->description);
fd38203a
LP
884 free(s);
885}
8c75fe17 886DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
887
888static int source_set_pending(sd_event_source *s, bool b) {
889 int r;
890
891 assert(s);
6203e07a 892 assert(s->type != SOURCE_EXIT);
fd38203a
LP
893
894 if (s->pending == b)
895 return 0;
896
897 s->pending = b;
898
899 if (b) {
900 s->pending_iteration = s->event->iteration;
901
902 r = prioq_put(s->event->pending, s, &s->pending_index);
903 if (r < 0) {
904 s->pending = false;
905 return r;
906 }
907 } else
908 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
909
6a0f1f6d
LP
910 if (EVENT_SOURCE_IS_TIME(s->type)) {
911 struct clock_data *d;
912
913 d = event_get_clock_data(s->event, s->type);
914 assert(d);
915
916 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
917 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 918 d->needs_rearm = true;
2576a19e
LP
919 }
920
9da4cb2b
LP
921 if (s->type == SOURCE_SIGNAL && !b) {
922 struct signal_data *d;
923
924 d = hashmap_get(s->event->signal_data, &s->priority);
925 if (d && d->current == s)
926 d->current = NULL;
927 }
928
97ef5391
LP
929 if (s->type == SOURCE_INOTIFY) {
930
931 assert(s->inotify.inode_data);
932 assert(s->inotify.inode_data->inotify_data);
933
934 if (b)
935 s->inotify.inode_data->inotify_data->n_pending ++;
936 else {
937 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
938 s->inotify.inode_data->inotify_data->n_pending --;
939 }
940 }
941
fd38203a
LP
942 return 0;
943}
944
a71fe8b8 945static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
946 sd_event_source *s;
947
948 assert(e);
949
d08eb1fa 950 s = new(sd_event_source, 1);
fd38203a
LP
951 if (!s)
952 return NULL;
953
d08eb1fa
LP
954 *s = (struct sd_event_source) {
955 .n_ref = 1,
956 .event = e,
957 .floating = floating,
958 .type = type,
959 .pending_index = PRIOQ_IDX_NULL,
960 .prepare_index = PRIOQ_IDX_NULL,
961 };
a71fe8b8
LP
962
963 if (!floating)
964 sd_event_ref(e);
fd38203a 965
a71fe8b8 966 LIST_PREPEND(sources, e->sources, s);
313cefa1 967 e->n_sources++;
15b38f93 968
fd38203a
LP
969 return s;
970}
971
b9350e70
LP
972static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
973 assert(s);
974
975 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
976}
977
f7262a9f 978_public_ int sd_event_add_io(
fd38203a 979 sd_event *e,
151b9b96 980 sd_event_source **ret,
fd38203a
LP
981 int fd,
982 uint32_t events,
718db961 983 sd_event_io_handler_t callback,
151b9b96 984 void *userdata) {
fd38203a 985
ec766a51 986 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
987 int r;
988
305f78bf 989 assert_return(e, -EINVAL);
b937d761 990 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 991 assert_return(fd >= 0, -EBADF);
2a16a986 992 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 993 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 994 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 995
b9350e70
LP
996 if (!callback)
997 callback = io_exit_callback;
998
a71fe8b8 999 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1000 if (!s)
1001 return -ENOMEM;
1002
9da4cb2b 1003 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1004 s->io.fd = fd;
1005 s->io.events = events;
1006 s->io.callback = callback;
1007 s->userdata = userdata;
baf76283 1008 s->enabled = SD_EVENT_ON;
fd38203a 1009
baf76283 1010 r = source_io_register(s, s->enabled, events);
ec766a51 1011 if (r < 0)
050f74f2 1012 return r;
fd38203a 1013
a71fe8b8
LP
1014 if (ret)
1015 *ret = s;
ec766a51 1016 TAKE_PTR(s);
a71fe8b8 1017
fd38203a
LP
1018 return 0;
1019}
1020
52444dc4
LP
1021static void initialize_perturb(sd_event *e) {
1022 sd_id128_t bootid = {};
1023
1024 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1025 the same time within each minute/second/250ms, so that
52444dc4
LP
1026 events all across the system can be coalesced into a single
1027 CPU wakeup. However, let's take some system-specific
1028 randomness for this value, so that in a network of systems
1029 with synced clocks timer events are distributed a
1030 bit. Here, we calculate a perturbation usec offset from the
1031 boot ID. */
1032
3a43da28 1033 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1034 return;
1035
1036 if (sd_id128_get_boot(&bootid) >= 0)
1037 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1038}
1039
fd38203a
LP
1040static int event_setup_timer_fd(
1041 sd_event *e,
6a0f1f6d
LP
1042 struct clock_data *d,
1043 clockid_t clock) {
fd38203a 1044
fd38203a 1045 assert(e);
6a0f1f6d 1046 assert(d);
fd38203a 1047
6a0f1f6d 1048 if (_likely_(d->fd >= 0))
fd38203a
LP
1049 return 0;
1050
b44d87e2 1051 _cleanup_close_ int fd = -1;
b44d87e2 1052
6a0f1f6d 1053 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1054 if (fd < 0)
1055 return -errno;
1056
7fe2903c
LP
1057 fd = fd_move_above_stdio(fd);
1058
1eac7948 1059 struct epoll_event ev = {
a82f89aa
LP
1060 .events = EPOLLIN,
1061 .data.ptr = d,
1062 };
fd38203a 1063
15c689d7 1064 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1065 return -errno;
fd38203a 1066
b44d87e2 1067 d->fd = TAKE_FD(fd);
fd38203a
LP
1068 return 0;
1069}
1070
c4f1aff2
TG
1071static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1072 assert(s);
1073
1074 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1075}
1076
6a0f1f6d 1077_public_ int sd_event_add_time(
fd38203a 1078 sd_event *e,
151b9b96 1079 sd_event_source **ret,
6a0f1f6d 1080 clockid_t clock,
fd38203a 1081 uint64_t usec,
c2ba3ad6 1082 uint64_t accuracy,
718db961 1083 sd_event_time_handler_t callback,
151b9b96 1084 void *userdata) {
fd38203a 1085
6a0f1f6d 1086 EventSourceType type;
ec766a51 1087 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1088 struct clock_data *d;
fd38203a
LP
1089 int r;
1090
305f78bf 1091 assert_return(e, -EINVAL);
b937d761 1092 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 1093 assert_return(accuracy != (uint64_t) -1, -EINVAL);
da7e457c 1094 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1095 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1096
e475d10c
LP
1097 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1098 return -EOPNOTSUPP;
1099
1100 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1101 if (type < 0)
3411372e
LP
1102 return -EOPNOTSUPP;
1103
c4f1aff2
TG
1104 if (!callback)
1105 callback = time_exit_callback;
1106
6a0f1f6d
LP
1107 d = event_get_clock_data(e, type);
1108 assert(d);
c2ba3ad6 1109
c983e776
EV
1110 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1111 if (r < 0)
1112 return r;
fd38203a 1113
c983e776
EV
1114 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1115 if (r < 0)
1116 return r;
fd38203a 1117
6a0f1f6d
LP
1118 if (d->fd < 0) {
1119 r = event_setup_timer_fd(e, d, clock);
fd38203a
LP
1120 if (r < 0)
1121 return r;
1122 }
1123
a71fe8b8 1124 s = source_new(e, !ret, type);
fd38203a
LP
1125 if (!s)
1126 return -ENOMEM;
1127
1128 s->time.next = usec;
c2ba3ad6 1129 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1130 s->time.callback = callback;
da7e457c 1131 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
fd38203a 1132 s->userdata = userdata;
baf76283 1133 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1134
e07bbb7c
TG
1135 d->needs_rearm = true;
1136
6a0f1f6d 1137 r = prioq_put(d->earliest, s, &s->time.earliest_index);
c2ba3ad6 1138 if (r < 0)
ec766a51 1139 return r;
c2ba3ad6 1140
6a0f1f6d 1141 r = prioq_put(d->latest, s, &s->time.latest_index);
c2ba3ad6 1142 if (r < 0)
ec766a51 1143 return r;
fd38203a 1144
a71fe8b8
LP
1145 if (ret)
1146 *ret = s;
ec766a51 1147 TAKE_PTR(s);
a71fe8b8 1148
fd38203a
LP
1149 return 0;
1150}
1151
d6a83dc4
LP
1152_public_ int sd_event_add_time_relative(
1153 sd_event *e,
1154 sd_event_source **ret,
1155 clockid_t clock,
1156 uint64_t usec,
1157 uint64_t accuracy,
1158 sd_event_time_handler_t callback,
1159 void *userdata) {
1160
1161 usec_t t;
1162 int r;
1163
1164 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1165 * checks for overflow. */
1166
1167 r = sd_event_now(e, clock, &t);
1168 if (r < 0)
1169 return r;
1170
1171 if (usec >= USEC_INFINITY - t)
1172 return -EOVERFLOW;
1173
1174 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1175}
1176
59bc1fd7
LP
1177static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1178 assert(s);
1179
1180 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1181}
1182
f7262a9f 1183_public_ int sd_event_add_signal(
305f78bf 1184 sd_event *e,
151b9b96 1185 sd_event_source **ret,
305f78bf 1186 int sig,
718db961 1187 sd_event_signal_handler_t callback,
151b9b96 1188 void *userdata) {
305f78bf 1189
ec766a51 1190 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1191 struct signal_data *d;
fd38203a
LP
1192 int r;
1193
305f78bf 1194 assert_return(e, -EINVAL);
b937d761 1195 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1196 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1197 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1198 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1199
59bc1fd7
LP
1200 if (!callback)
1201 callback = signal_exit_callback;
1202
d1b75241
LP
1203 r = signal_is_blocked(sig);
1204 if (r < 0)
1205 return r;
1206 if (r == 0)
3022d74b
LP
1207 return -EBUSY;
1208
fd38203a
LP
1209 if (!e->signal_sources) {
1210 e->signal_sources = new0(sd_event_source*, _NSIG);
1211 if (!e->signal_sources)
1212 return -ENOMEM;
1213 } else if (e->signal_sources[sig])
1214 return -EBUSY;
1215
a71fe8b8 1216 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1217 if (!s)
1218 return -ENOMEM;
1219
1220 s->signal.sig = sig;
1221 s->signal.callback = callback;
1222 s->userdata = userdata;
baf76283 1223 s->enabled = SD_EVENT_ON;
fd38203a
LP
1224
1225 e->signal_sources[sig] = s;
fd38203a 1226
9da4cb2b 1227 r = event_make_signal_data(e, sig, &d);
ec766a51 1228 if (r < 0)
9da4cb2b 1229 return r;
fd38203a 1230
f1f00dbb
LP
1231 /* Use the signal name as description for the event source by default */
1232 (void) sd_event_source_set_description(s, signal_to_string(sig));
1233
a71fe8b8
LP
1234 if (ret)
1235 *ret = s;
ec766a51 1236 TAKE_PTR(s);
a71fe8b8 1237
fd38203a
LP
1238 return 0;
1239}
1240
b9350e70
LP
1241static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1242 assert(s);
1243
1244 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1245}
1246
f8f3f926
LP
1247static bool shall_use_pidfd(void) {
1248 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1249 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1250}
1251
f7262a9f 1252_public_ int sd_event_add_child(
305f78bf 1253 sd_event *e,
151b9b96 1254 sd_event_source **ret,
305f78bf
LP
1255 pid_t pid,
1256 int options,
718db961 1257 sd_event_child_handler_t callback,
151b9b96 1258 void *userdata) {
305f78bf 1259
ec766a51 1260 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1261 int r;
1262
305f78bf 1263 assert_return(e, -EINVAL);
b937d761 1264 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1265 assert_return(pid > 1, -EINVAL);
1266 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1267 assert_return(options != 0, -EINVAL);
da7e457c 1268 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1269 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1270
b9350e70
LP
1271 if (!callback)
1272 callback = child_exit_callback;
1273
ee880b37
LP
1274 if (e->n_enabled_child_sources == 0) {
1275 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1276 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1277 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1278 * take effect.
1279 *
1280 * (As an optimization we only do this check on the first child event source created.) */
1281 r = signal_is_blocked(SIGCHLD);
1282 if (r < 0)
1283 return r;
1284 if (r == 0)
1285 return -EBUSY;
1286 }
1287
d5099efc 1288 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1289 if (r < 0)
1290 return r;
1291
4a0b58c4 1292 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1293 return -EBUSY;
1294
a71fe8b8 1295 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1296 if (!s)
1297 return -ENOMEM;
1298
f8f3f926 1299 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1300 s->child.pid = pid;
1301 s->child.options = options;
1302 s->child.callback = callback;
1303 s->userdata = userdata;
baf76283 1304 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1305
f8f3f926
LP
1306 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1307 * pin the PID, and make regular waitid() handling race-free. */
1308
1309 if (shall_use_pidfd()) {
1310 s->child.pidfd = pidfd_open(s->child.pid, 0);
1311 if (s->child.pidfd < 0) {
1312 /* Propagate errors unless the syscall is not supported or blocked */
1313 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1314 return -errno;
1315 } else
1316 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1317 } else
1318 s->child.pidfd = -1;
1319
4a0b58c4 1320 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1321 if (r < 0)
fd38203a 1322 return r;
fd38203a 1323
313cefa1 1324 e->n_enabled_child_sources++;
fd38203a 1325
f8f3f926
LP
1326 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1327 /* We have a pidfd and we only want to watch for exit */
1328
1329 r = source_child_pidfd_register(s, s->enabled);
1330 if (r < 0) {
1331 e->n_enabled_child_sources--;
1332 return r;
1333 }
1334 } else {
1335 /* We have no pidfd or we shall wait for some other event than WEXITED */
fd38203a 1336
f8f3f926
LP
1337 r = event_make_signal_data(e, SIGCHLD, NULL);
1338 if (r < 0) {
1339 e->n_enabled_child_sources--;
1340 return r;
1341 }
1342
1343 e->need_process_child = true;
1344 }
c2ba3ad6 1345
a71fe8b8
LP
1346 if (ret)
1347 *ret = s;
f8f3f926 1348
ec766a51 1349 TAKE_PTR(s);
f8f3f926
LP
1350 return 0;
1351}
1352
1353_public_ int sd_event_add_child_pidfd(
1354 sd_event *e,
1355 sd_event_source **ret,
1356 int pidfd,
1357 int options,
1358 sd_event_child_handler_t callback,
1359 void *userdata) {
1360
1361
1362 _cleanup_(source_freep) sd_event_source *s = NULL;
1363 pid_t pid;
1364 int r;
1365
1366 assert_return(e, -EINVAL);
1367 assert_return(e = event_resolve(e), -ENOPKG);
1368 assert_return(pidfd >= 0, -EBADF);
1369 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1370 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1371 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1372 assert_return(!event_pid_changed(e), -ECHILD);
1373
b9350e70
LP
1374 if (!callback)
1375 callback = child_exit_callback;
1376
ee880b37
LP
1377 if (e->n_enabled_child_sources == 0) {
1378 r = signal_is_blocked(SIGCHLD);
1379 if (r < 0)
1380 return r;
1381 if (r == 0)
1382 return -EBUSY;
1383 }
1384
f8f3f926
LP
1385 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1386 if (r < 0)
1387 return r;
1388
1389 r = pidfd_get_pid(pidfd, &pid);
1390 if (r < 0)
1391 return r;
1392
1393 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1394 return -EBUSY;
1395
1396 s = source_new(e, !ret, SOURCE_CHILD);
1397 if (!s)
1398 return -ENOMEM;
1399
1400 s->wakeup = WAKEUP_EVENT_SOURCE;
1401 s->child.pidfd = pidfd;
1402 s->child.pid = pid;
1403 s->child.options = options;
1404 s->child.callback = callback;
1405 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1406 s->userdata = userdata;
1407 s->enabled = SD_EVENT_ONESHOT;
1408
1409 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1410 if (r < 0)
1411 return r;
1412
1413 e->n_enabled_child_sources++;
1414
1415 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1416 /* We only want to watch for WEXITED */
1417
1418 r = source_child_pidfd_register(s, s->enabled);
1419 if (r < 0) {
1420 e->n_enabled_child_sources--;
1421 return r;
1422 }
1423 } else {
1424 /* We shall wait for some other event than WEXITED */
1425
1426 r = event_make_signal_data(e, SIGCHLD, NULL);
1427 if (r < 0) {
1428 e->n_enabled_child_sources--;
1429 return r;
1430 }
a71fe8b8 1431
f8f3f926
LP
1432 e->need_process_child = true;
1433 }
1434
1435 if (ret)
1436 *ret = s;
1437
1438 TAKE_PTR(s);
fd38203a
LP
1439 return 0;
1440}
1441
b9350e70
LP
1442static int generic_exit_callback(sd_event_source *s, void *userdata) {
1443 assert(s);
1444
1445 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1446}
1447
f7262a9f 1448_public_ int sd_event_add_defer(
305f78bf 1449 sd_event *e,
151b9b96 1450 sd_event_source **ret,
718db961 1451 sd_event_handler_t callback,
151b9b96 1452 void *userdata) {
305f78bf 1453
ec766a51 1454 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1455 int r;
1456
305f78bf 1457 assert_return(e, -EINVAL);
b937d761 1458 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1459 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1460 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1461
b9350e70
LP
1462 if (!callback)
1463 callback = generic_exit_callback;
1464
a71fe8b8 1465 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1466 if (!s)
1467 return -ENOMEM;
1468
1469 s->defer.callback = callback;
1470 s->userdata = userdata;
baf76283 1471 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1472
1473 r = source_set_pending(s, true);
ec766a51 1474 if (r < 0)
fd38203a 1475 return r;
fd38203a 1476
a71fe8b8
LP
1477 if (ret)
1478 *ret = s;
ec766a51 1479 TAKE_PTR(s);
a71fe8b8 1480
fd38203a
LP
1481 return 0;
1482}
1483
6e9feda3
LP
1484_public_ int sd_event_add_post(
1485 sd_event *e,
1486 sd_event_source **ret,
1487 sd_event_handler_t callback,
1488 void *userdata) {
1489
ec766a51 1490 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1491 int r;
1492
1493 assert_return(e, -EINVAL);
b937d761 1494 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1495 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1496 assert_return(!event_pid_changed(e), -ECHILD);
1497
b9350e70
LP
1498 if (!callback)
1499 callback = generic_exit_callback;
1500
a71fe8b8 1501 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1502 if (!s)
1503 return -ENOMEM;
1504
1505 s->post.callback = callback;
1506 s->userdata = userdata;
1507 s->enabled = SD_EVENT_ON;
1508
de7fef4b 1509 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1510 if (r < 0)
6e9feda3 1511 return r;
de7fef4b 1512 assert(r > 0);
6e9feda3 1513
a71fe8b8
LP
1514 if (ret)
1515 *ret = s;
ec766a51 1516 TAKE_PTR(s);
a71fe8b8 1517
6e9feda3
LP
1518 return 0;
1519}
1520
6203e07a 1521_public_ int sd_event_add_exit(
305f78bf 1522 sd_event *e,
151b9b96 1523 sd_event_source **ret,
718db961 1524 sd_event_handler_t callback,
151b9b96 1525 void *userdata) {
305f78bf 1526
ec766a51 1527 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1528 int r;
1529
1530 assert_return(e, -EINVAL);
b937d761 1531 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1532 assert_return(callback, -EINVAL);
1533 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1534 assert_return(!event_pid_changed(e), -ECHILD);
1535
c983e776
EV
1536 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1537 if (r < 0)
1538 return r;
da7e457c 1539
a71fe8b8 1540 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1541 if (!s)
da7e457c 1542 return -ENOMEM;
fd38203a 1543
6203e07a 1544 s->exit.callback = callback;
da7e457c 1545 s->userdata = userdata;
6203e07a 1546 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1547 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1548
6203e07a 1549 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1550 if (r < 0)
da7e457c 1551 return r;
da7e457c 1552
a71fe8b8
LP
1553 if (ret)
1554 *ret = s;
ec766a51 1555 TAKE_PTR(s);
a71fe8b8 1556
da7e457c
LP
1557 return 0;
1558}
1559
97ef5391
LP
1560static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1561 assert(e);
1562
1563 if (!d)
1564 return;
1565
1566 assert(hashmap_isempty(d->inodes));
1567 assert(hashmap_isempty(d->wd));
1568
1569 if (d->buffer_filled > 0)
1570 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1571
1572 hashmap_free(d->inodes);
1573 hashmap_free(d->wd);
1574
1575 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1576
1577 if (d->fd >= 0) {
1578 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1579 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1580
1581 safe_close(d->fd);
1582 }
1583 free(d);
1584}
1585
1586static int event_make_inotify_data(
1587 sd_event *e,
1588 int64_t priority,
1589 struct inotify_data **ret) {
1590
1591 _cleanup_close_ int fd = -1;
1592 struct inotify_data *d;
97ef5391
LP
1593 int r;
1594
1595 assert(e);
1596
1597 d = hashmap_get(e->inotify_data, &priority);
1598 if (d) {
1599 if (ret)
1600 *ret = d;
1601 return 0;
1602 }
1603
1604 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1605 if (fd < 0)
1606 return -errno;
1607
1608 fd = fd_move_above_stdio(fd);
1609
1610 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1611 if (r < 0)
1612 return r;
1613
1614 d = new(struct inotify_data, 1);
1615 if (!d)
1616 return -ENOMEM;
1617
1618 *d = (struct inotify_data) {
1619 .wakeup = WAKEUP_INOTIFY_DATA,
1620 .fd = TAKE_FD(fd),
1621 .priority = priority,
1622 };
1623
1624 r = hashmap_put(e->inotify_data, &d->priority, d);
1625 if (r < 0) {
1626 d->fd = safe_close(d->fd);
1627 free(d);
1628 return r;
1629 }
1630
1eac7948 1631 struct epoll_event ev = {
97ef5391
LP
1632 .events = EPOLLIN,
1633 .data.ptr = d,
1634 };
1635
1636 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1637 r = -errno;
1638 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1639 * remove the fd from the epoll first, which we don't want as we couldn't
1640 * add it in the first place. */
1641 event_free_inotify_data(e, d);
1642 return r;
1643 }
1644
1645 if (ret)
1646 *ret = d;
1647
1648 return 1;
1649}
1650
7a08d314 1651static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1652 int r;
97ef5391
LP
1653
1654 assert(x);
1655 assert(y);
1656
90c88092
YW
1657 r = CMP(x->dev, y->dev);
1658 if (r != 0)
1659 return r;
97ef5391 1660
6dd91b36 1661 return CMP(x->ino, y->ino);
97ef5391
LP
1662}
1663
7a08d314
YW
1664static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1665 assert(d);
97ef5391
LP
1666
1667 siphash24_compress(&d->dev, sizeof(d->dev), state);
1668 siphash24_compress(&d->ino, sizeof(d->ino), state);
1669}
1670
7a08d314 1671DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1672
1673static void event_free_inode_data(
1674 sd_event *e,
1675 struct inode_data *d) {
1676
1677 assert(e);
1678
1679 if (!d)
1680 return;
1681
1682 assert(!d->event_sources);
1683
1684 if (d->fd >= 0) {
1685 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1686 safe_close(d->fd);
1687 }
1688
1689 if (d->inotify_data) {
1690
1691 if (d->wd >= 0) {
1692 if (d->inotify_data->fd >= 0) {
1693 /* So here's a problem. At the time this runs the watch descriptor might already be
1694 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1695 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1696 * likely case to happen. */
1697
1698 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1699 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1700 }
1701
1702 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1703 }
1704
1705 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1706 }
1707
1708 free(d);
1709}
1710
1711static void event_gc_inode_data(
1712 sd_event *e,
1713 struct inode_data *d) {
1714
1715 struct inotify_data *inotify_data;
1716
1717 assert(e);
1718
1719 if (!d)
1720 return;
1721
1722 if (d->event_sources)
1723 return;
1724
1725 inotify_data = d->inotify_data;
1726 event_free_inode_data(e, d);
1727
1728 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1729 event_free_inotify_data(e, inotify_data);
1730}
1731
1732static int event_make_inode_data(
1733 sd_event *e,
1734 struct inotify_data *inotify_data,
1735 dev_t dev,
1736 ino_t ino,
1737 struct inode_data **ret) {
1738
1739 struct inode_data *d, key;
1740 int r;
1741
1742 assert(e);
1743 assert(inotify_data);
1744
1745 key = (struct inode_data) {
1746 .ino = ino,
1747 .dev = dev,
1748 };
1749
1750 d = hashmap_get(inotify_data->inodes, &key);
1751 if (d) {
1752 if (ret)
1753 *ret = d;
1754
1755 return 0;
1756 }
1757
1758 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1759 if (r < 0)
1760 return r;
1761
1762 d = new(struct inode_data, 1);
1763 if (!d)
1764 return -ENOMEM;
1765
1766 *d = (struct inode_data) {
1767 .dev = dev,
1768 .ino = ino,
1769 .wd = -1,
1770 .fd = -1,
1771 .inotify_data = inotify_data,
1772 };
1773
1774 r = hashmap_put(inotify_data->inodes, d, d);
1775 if (r < 0) {
1776 free(d);
1777 return r;
1778 }
1779
1780 if (ret)
1781 *ret = d;
1782
1783 return 1;
1784}
1785
1786static uint32_t inode_data_determine_mask(struct inode_data *d) {
1787 bool excl_unlink = true;
1788 uint32_t combined = 0;
1789 sd_event_source *s;
1790
1791 assert(d);
1792
1793 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1794 * the IN_EXCL_UNLINK flag is ANDed instead.
1795 *
1796 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1797 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1798 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1799 * events we don't care for client-side. */
1800
1801 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1802
1803 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1804 excl_unlink = false;
1805
1806 combined |= s->inotify.mask;
1807 }
1808
1809 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1810}
1811
1812static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1813 uint32_t combined_mask;
1814 int wd, r;
1815
1816 assert(d);
1817 assert(d->fd >= 0);
1818
1819 combined_mask = inode_data_determine_mask(d);
1820
1821 if (d->wd >= 0 && combined_mask == d->combined_mask)
1822 return 0;
1823
1824 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1825 if (r < 0)
1826 return r;
1827
1828 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1829 if (wd < 0)
1830 return -errno;
1831
1832 if (d->wd < 0) {
1833 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1834 if (r < 0) {
1835 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1836 return r;
1837 }
1838
1839 d->wd = wd;
1840
1841 } else if (d->wd != wd) {
1842
1843 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1844 (void) inotify_rm_watch(d->fd, wd);
1845 return -EINVAL;
1846 }
1847
1848 d->combined_mask = combined_mask;
1849 return 1;
1850}
1851
b9350e70
LP
1852static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1853 assert(s);
1854
1855 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1856}
1857
97ef5391
LP
1858_public_ int sd_event_add_inotify(
1859 sd_event *e,
1860 sd_event_source **ret,
1861 const char *path,
1862 uint32_t mask,
1863 sd_event_inotify_handler_t callback,
1864 void *userdata) {
1865
97ef5391
LP
1866 struct inotify_data *inotify_data = NULL;
1867 struct inode_data *inode_data = NULL;
1868 _cleanup_close_ int fd = -1;
8c75fe17 1869 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
1870 struct stat st;
1871 int r;
1872
1873 assert_return(e, -EINVAL);
1874 assert_return(e = event_resolve(e), -ENOPKG);
1875 assert_return(path, -EINVAL);
97ef5391
LP
1876 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1877 assert_return(!event_pid_changed(e), -ECHILD);
1878
b9350e70
LP
1879 if (!callback)
1880 callback = inotify_exit_callback;
1881
97ef5391
LP
1882 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1883 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1884 * the user can't use them for us. */
1885 if (mask & IN_MASK_ADD)
1886 return -EINVAL;
1887
1888 fd = open(path, O_PATH|O_CLOEXEC|
1889 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1890 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1891 if (fd < 0)
1892 return -errno;
1893
1894 if (fstat(fd, &st) < 0)
1895 return -errno;
1896
1897 s = source_new(e, !ret, SOURCE_INOTIFY);
1898 if (!s)
1899 return -ENOMEM;
1900
1901 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1902 s->inotify.mask = mask;
1903 s->inotify.callback = callback;
1904 s->userdata = userdata;
1905
1906 /* Allocate an inotify object for this priority, and an inode object within it */
1907 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1908 if (r < 0)
8c75fe17 1909 return r;
97ef5391
LP
1910
1911 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17
ZJS
1912 if (r < 0) {
1913 event_free_inotify_data(e, inotify_data);
1914 return r;
1915 }
97ef5391
LP
1916
1917 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1918 * the event source, until then, for which we need the original inode. */
1919 if (inode_data->fd < 0) {
1920 inode_data->fd = TAKE_FD(fd);
1921 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1922 }
1923
1924 /* Link our event source to the inode data object */
1925 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1926 s->inotify.inode_data = inode_data;
1927
97ef5391
LP
1928 /* Actually realize the watch now */
1929 r = inode_data_realize_watch(e, inode_data);
1930 if (r < 0)
8c75fe17 1931 return r;
97ef5391
LP
1932
1933 (void) sd_event_source_set_description(s, path);
1934
1935 if (ret)
1936 *ret = s;
8c75fe17 1937 TAKE_PTR(s);
97ef5391
LP
1938
1939 return 0;
97ef5391
LP
1940}
1941
8301aa0b 1942static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
1943 if (!s)
1944 return NULL;
da7e457c 1945
8301aa0b
YW
1946 /* Here's a special hack: when we are called from a
1947 * dispatch handler we won't free the event source
1948 * immediately, but we will detach the fd from the
1949 * epoll. This way it is safe for the caller to unref
1950 * the event source and immediately close the fd, but
1951 * we still retain a valid event source object after
1952 * the callback. */
fd38203a 1953
8301aa0b
YW
1954 if (s->dispatching) {
1955 if (s->type == SOURCE_IO)
1956 source_io_unregister(s);
fd38203a 1957
8301aa0b
YW
1958 source_disconnect(s);
1959 } else
1960 source_free(s);
fd38203a
LP
1961
1962 return NULL;
1963}
1964
8301aa0b
YW
1965DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1966
356779df 1967_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 1968 assert_return(s, -EINVAL);
f4b2933e 1969 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1970
356779df 1971 return free_and_strdup(&s->description, description);
f7f53e9e
TG
1972}
1973
356779df 1974_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 1975 assert_return(s, -EINVAL);
356779df 1976 assert_return(description, -EINVAL);
f4b2933e 1977 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1978
7d92a1a4
ZJS
1979 if (!s->description)
1980 return -ENXIO;
1981
356779df 1982 *description = s->description;
f7f53e9e
TG
1983 return 0;
1984}
1985
adcc4ca3 1986_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 1987 assert_return(s, NULL);
eaa3cbef
LP
1988
1989 return s->event;
1990}
1991
f7262a9f 1992_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 1993 assert_return(s, -EINVAL);
6203e07a 1994 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 1995 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1996 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1997
1998 return s->pending;
1999}
2000
f7262a9f 2001_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2002 assert_return(s, -EINVAL);
2003 assert_return(s->type == SOURCE_IO, -EDOM);
2004 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2005
2006 return s->io.fd;
2007}
2008
30caf8f3
LP
2009_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2010 int r;
2011
2012 assert_return(s, -EINVAL);
8ac43fee 2013 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2014 assert_return(s->type == SOURCE_IO, -EDOM);
2015 assert_return(!event_pid_changed(s->event), -ECHILD);
2016
2017 if (s->io.fd == fd)
2018 return 0;
2019
2020 if (s->enabled == SD_EVENT_OFF) {
2021 s->io.fd = fd;
2022 s->io.registered = false;
2023 } else {
2024 int saved_fd;
2025
2026 saved_fd = s->io.fd;
2027 assert(s->io.registered);
2028
2029 s->io.fd = fd;
2030 s->io.registered = false;
2031
2032 r = source_io_register(s, s->enabled, s->io.events);
2033 if (r < 0) {
2034 s->io.fd = saved_fd;
2035 s->io.registered = true;
2036 return r;
2037 }
2038
5a795bff 2039 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2040 }
2041
2042 return 0;
2043}
2044
ab93297c
NM
2045_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2046 assert_return(s, -EINVAL);
2047 assert_return(s->type == SOURCE_IO, -EDOM);
2048
2049 return s->io.owned;
2050}
2051
2052_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2053 assert_return(s, -EINVAL);
2054 assert_return(s->type == SOURCE_IO, -EDOM);
2055
2056 s->io.owned = own;
2057 return 0;
2058}
2059
f7262a9f 2060_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2061 assert_return(s, -EINVAL);
2062 assert_return(events, -EINVAL);
2063 assert_return(s->type == SOURCE_IO, -EDOM);
2064 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2065
2066 *events = s->io.events;
2067 return 0;
2068}
2069
f7262a9f 2070_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2071 int r;
2072
305f78bf
LP
2073 assert_return(s, -EINVAL);
2074 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2075 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2076 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2077 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2078
b63c8d4f
DH
2079 /* edge-triggered updates are never skipped, so we can reset edges */
2080 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2081 return 0;
2082
2a0dc6cd
LP
2083 r = source_set_pending(s, false);
2084 if (r < 0)
2085 return r;
2086
baf76283 2087 if (s->enabled != SD_EVENT_OFF) {
e4715127 2088 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2089 if (r < 0)
2090 return r;
2091 }
2092
2093 s->io.events = events;
2094
2095 return 0;
2096}
2097
f7262a9f 2098_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2099 assert_return(s, -EINVAL);
2100 assert_return(revents, -EINVAL);
2101 assert_return(s->type == SOURCE_IO, -EDOM);
2102 assert_return(s->pending, -ENODATA);
2103 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2104
2105 *revents = s->io.revents;
2106 return 0;
2107}
2108
f7262a9f 2109_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2110 assert_return(s, -EINVAL);
2111 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2112 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2113
2114 return s->signal.sig;
2115}
2116
31927c16 2117_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2118 assert_return(s, -EINVAL);
2119 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2120
6680b8d1
ME
2121 *priority = s->priority;
2122 return 0;
fd38203a
LP
2123}
2124
31927c16 2125_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2126 bool rm_inotify = false, rm_inode = false;
2127 struct inotify_data *new_inotify_data = NULL;
2128 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2129 int r;
2130
305f78bf 2131 assert_return(s, -EINVAL);
da7e457c 2132 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2133 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2134
2135 if (s->priority == priority)
2136 return 0;
2137
97ef5391
LP
2138 if (s->type == SOURCE_INOTIFY) {
2139 struct inode_data *old_inode_data;
2140
2141 assert(s->inotify.inode_data);
2142 old_inode_data = s->inotify.inode_data;
2143
2144 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2145 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2146 * events we allow priority changes only until the first following iteration. */
2147 if (old_inode_data->fd < 0)
2148 return -EOPNOTSUPP;
2149
2150 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2151 if (r < 0)
2152 return r;
2153 rm_inotify = r > 0;
2154
2155 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2156 if (r < 0)
2157 goto fail;
2158 rm_inode = r > 0;
2159
2160 if (new_inode_data->fd < 0) {
2161 /* Duplicate the fd for the new inode object if we don't have any yet */
2162 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2163 if (new_inode_data->fd < 0) {
2164 r = -errno;
2165 goto fail;
2166 }
2167
2168 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2169 }
2170
2171 /* Move the event source to the new inode data structure */
2172 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2173 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2174 s->inotify.inode_data = new_inode_data;
2175
2176 /* Now create the new watch */
2177 r = inode_data_realize_watch(s->event, new_inode_data);
2178 if (r < 0) {
2179 /* Move it back */
2180 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2181 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2182 s->inotify.inode_data = old_inode_data;
2183 goto fail;
2184 }
2185
2186 s->priority = priority;
2187
2188 event_gc_inode_data(s->event, old_inode_data);
2189
2190 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
9da4cb2b
LP
2191 struct signal_data *old, *d;
2192
2193 /* Move us from the signalfd belonging to the old
2194 * priority to the signalfd of the new priority */
2195
2196 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2197
2198 s->priority = priority;
2199
2200 r = event_make_signal_data(s->event, s->signal.sig, &d);
2201 if (r < 0) {
2202 s->priority = old->priority;
2203 return r;
2204 }
2205
2206 event_unmask_signal_data(s->event, old, s->signal.sig);
2207 } else
2208 s->priority = priority;
fd38203a
LP
2209
2210 if (s->pending)
c2ba3ad6 2211 prioq_reshuffle(s->event->pending, s, &s->pending_index);
fd38203a
LP
2212
2213 if (s->prepare)
c2ba3ad6 2214 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
fd38203a 2215
6203e07a
LP
2216 if (s->type == SOURCE_EXIT)
2217 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2218
fd38203a 2219 return 0;
97ef5391
LP
2220
2221fail:
2222 if (rm_inode)
2223 event_free_inode_data(s->event, new_inode_data);
2224
2225 if (rm_inotify)
2226 event_free_inotify_data(s->event, new_inotify_data);
2227
2228 return r;
fd38203a
LP
2229}
2230
f7262a9f 2231_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
305f78bf 2232 assert_return(s, -EINVAL);
305f78bf 2233 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2234
08c1eb0e
ZJS
2235 if (m)
2236 *m = s->enabled;
2237 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2238}
2239
f7262a9f 2240_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
fd38203a
LP
2241 int r;
2242
305f78bf 2243 assert_return(s, -EINVAL);
945c2931 2244 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
305f78bf 2245 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2246
cc567911
LP
2247 /* If we are dead anyway, we are fine with turning off
2248 * sources, but everything else needs to fail. */
2249 if (s->event->state == SD_EVENT_FINISHED)
2250 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2251
baf76283 2252 if (s->enabled == m)
fd38203a
LP
2253 return 0;
2254
baf76283 2255 if (m == SD_EVENT_OFF) {
fd38203a 2256
ac989a78
LP
2257 /* Unset the pending flag when this event source is disabled */
2258 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2259 r = source_set_pending(s, false);
2260 if (r < 0)
2261 return r;
2262 }
2263
fd38203a
LP
2264 switch (s->type) {
2265
2266 case SOURCE_IO:
366e6411 2267 source_io_unregister(s);
baf76283 2268 s->enabled = m;
fd38203a
LP
2269 break;
2270
6a0f1f6d 2271 case SOURCE_TIME_REALTIME:
a8548816 2272 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
2273 case SOURCE_TIME_MONOTONIC:
2274 case SOURCE_TIME_REALTIME_ALARM:
2275 case SOURCE_TIME_BOOTTIME_ALARM: {
2276 struct clock_data *d;
fd38203a 2277
baf76283 2278 s->enabled = m;
6a0f1f6d
LP
2279 d = event_get_clock_data(s->event, s->type);
2280 assert(d);
2281
2282 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2283 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2284 d->needs_rearm = true;
fd38203a 2285 break;
6a0f1f6d 2286 }
fd38203a
LP
2287
2288 case SOURCE_SIGNAL:
baf76283 2289 s->enabled = m;
4807d2d0 2290
9da4cb2b 2291 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
fd38203a
LP
2292 break;
2293
2294 case SOURCE_CHILD:
baf76283 2295 s->enabled = m;
fd38203a 2296
baf76283
LP
2297 assert(s->event->n_enabled_child_sources > 0);
2298 s->event->n_enabled_child_sources--;
fd38203a 2299
f8f3f926
LP
2300 if (EVENT_SOURCE_WATCH_PIDFD(s))
2301 source_child_pidfd_unregister(s);
2302 else
2303 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2304
fd38203a
LP
2305 break;
2306
6203e07a 2307 case SOURCE_EXIT:
305f78bf 2308 s->enabled = m;
6203e07a 2309 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf
LP
2310 break;
2311
2312 case SOURCE_DEFER:
6e9feda3 2313 case SOURCE_POST:
97ef5391 2314 case SOURCE_INOTIFY:
baf76283 2315 s->enabled = m;
fd38203a 2316 break;
9d3e3aa5 2317
6a0f1f6d 2318 default:
9d3e3aa5 2319 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
2320 }
2321
2322 } else {
ac989a78
LP
2323
2324 /* Unset the pending flag when this event source is enabled */
2325 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2326 r = source_set_pending(s, false);
2327 if (r < 0)
2328 return r;
2329 }
2330
fd38203a
LP
2331 switch (s->type) {
2332
2333 case SOURCE_IO:
2334 r = source_io_register(s, m, s->io.events);
2335 if (r < 0)
2336 return r;
2337
baf76283 2338 s->enabled = m;
fd38203a
LP
2339 break;
2340
6a0f1f6d 2341 case SOURCE_TIME_REALTIME:
a8548816 2342 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
2343 case SOURCE_TIME_MONOTONIC:
2344 case SOURCE_TIME_REALTIME_ALARM:
2345 case SOURCE_TIME_BOOTTIME_ALARM: {
2346 struct clock_data *d;
fd38203a 2347
baf76283 2348 s->enabled = m;
6a0f1f6d
LP
2349 d = event_get_clock_data(s->event, s->type);
2350 assert(d);
2351
2352 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2353 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2354 d->needs_rearm = true;
fd38203a 2355 break;
6a0f1f6d 2356 }
fd38203a
LP
2357
2358 case SOURCE_SIGNAL:
4807d2d0
ZJS
2359
2360 s->enabled = m;
9da4cb2b
LP
2361
2362 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2363 if (r < 0) {
2364 s->enabled = SD_EVENT_OFF;
2365 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2366 return r;
2367 }
2368
fd38203a
LP
2369 break;
2370
2371 case SOURCE_CHILD:
4807d2d0 2372
9da4cb2b 2373 if (s->enabled == SD_EVENT_OFF)
4807d2d0 2374 s->event->n_enabled_child_sources++;
7a0d4a3d
DH
2375
2376 s->enabled = m;
9da4cb2b 2377
f8f3f926
LP
2378 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2379 /* yes, we have pidfd */
2380
2381 r = source_child_pidfd_register(s, s->enabled);
2382 if (r < 0) {
2383 s->enabled = SD_EVENT_OFF;
2384 s->event->n_enabled_child_sources--;
2385 return r;
2386 }
2387 } else {
2388 /* no pidfd, or something other to watch for than WEXITED */
2389
2390 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2391 if (r < 0) {
2392 s->enabled = SD_EVENT_OFF;
2393 s->event->n_enabled_child_sources--;
2394 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2395 return r;
2396 }
9da4cb2b
LP
2397 }
2398
fd38203a
LP
2399 break;
2400
6203e07a 2401 case SOURCE_EXIT:
305f78bf 2402 s->enabled = m;
6203e07a 2403 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf
LP
2404 break;
2405
2406 case SOURCE_DEFER:
6e9feda3 2407 case SOURCE_POST:
97ef5391 2408 case SOURCE_INOTIFY:
baf76283 2409 s->enabled = m;
fd38203a 2410 break;
9d3e3aa5 2411
6a0f1f6d 2412 default:
9d3e3aa5 2413 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
2414 }
2415 }
2416
2417 if (s->pending)
2418 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2419
2420 if (s->prepare)
2421 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2422
2423 return 0;
2424}
2425
f7262a9f 2426_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2427 assert_return(s, -EINVAL);
2428 assert_return(usec, -EINVAL);
6a0f1f6d 2429 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2430 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2431
2432 *usec = s->time.next;
2433 return 0;
2434}
2435
f7262a9f 2436_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
6a0f1f6d 2437 struct clock_data *d;
2a0dc6cd 2438 int r;
6a0f1f6d 2439
305f78bf 2440 assert_return(s, -EINVAL);
6a0f1f6d 2441 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2442 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2443 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2444
2a0dc6cd
LP
2445 r = source_set_pending(s, false);
2446 if (r < 0)
2447 return r;
2576a19e 2448
2a0dc6cd 2449 s->time.next = usec;
fd38203a 2450
6a0f1f6d
LP
2451 d = event_get_clock_data(s->event, s->type);
2452 assert(d);
2453
2454 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2455 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2456 d->needs_rearm = true;
fd38203a
LP
2457
2458 return 0;
2459}
2460
d6a83dc4
LP
2461_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2462 usec_t t;
2463 int r;
2464
2465 assert_return(s, -EINVAL);
2466 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2467
2468 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2469 if (r < 0)
2470 return r;
2471
2472 if (usec >= USEC_INFINITY - t)
2473 return -EOVERFLOW;
2474
2475 return sd_event_source_set_time(s, t + usec);
2476}
2477
f7262a9f 2478_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2479 assert_return(s, -EINVAL);
2480 assert_return(usec, -EINVAL);
6a0f1f6d 2481 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2482 assert_return(!event_pid_changed(s->event), -ECHILD);
2483
2484 *usec = s->time.accuracy;
2485 return 0;
2486}
2487
f7262a9f 2488_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
6a0f1f6d 2489 struct clock_data *d;
2a0dc6cd 2490 int r;
6a0f1f6d 2491
305f78bf
LP
2492 assert_return(s, -EINVAL);
2493 assert_return(usec != (uint64_t) -1, -EINVAL);
6a0f1f6d 2494 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2495 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2496 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2497
2a0dc6cd
LP
2498 r = source_set_pending(s, false);
2499 if (r < 0)
2500 return r;
2501
eaa3cbef
LP
2502 if (usec == 0)
2503 usec = DEFAULT_ACCURACY_USEC;
2504
eaa3cbef
LP
2505 s->time.accuracy = usec;
2506
6a0f1f6d
LP
2507 d = event_get_clock_data(s->event, s->type);
2508 assert(d);
2509
2510 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2511 d->needs_rearm = true;
6a0f1f6d
LP
2512
2513 return 0;
2514}
2515
2516_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2517 assert_return(s, -EINVAL);
2518 assert_return(clock, -EINVAL);
2519 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2520 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2521
6a0f1f6d 2522 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2523 return 0;
2524}
2525
f7262a9f 2526_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2527 assert_return(s, -EINVAL);
2528 assert_return(pid, -EINVAL);
2529 assert_return(s->type == SOURCE_CHILD, -EDOM);
2530 assert_return(!event_pid_changed(s->event), -ECHILD);
2531
2532 *pid = s->child.pid;
2533 return 0;
2534}
2535
f8f3f926
LP
2536_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2537 assert_return(s, -EINVAL);
2538 assert_return(s->type == SOURCE_CHILD, -EDOM);
2539 assert_return(!event_pid_changed(s->event), -ECHILD);
2540
2541 if (s->child.pidfd < 0)
2542 return -EOPNOTSUPP;
2543
2544 return s->child.pidfd;
2545}
2546
2547_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2548 assert_return(s, -EINVAL);
2549 assert_return(s->type == SOURCE_CHILD, -EDOM);
2550 assert_return(!event_pid_changed(s->event), -ECHILD);
2551 assert_return(SIGNAL_VALID(sig), -EINVAL);
2552
2553 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2554 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2555 * available. */
2556 if (s->child.exited)
2557 return -ESRCH;
2558
2559 if (s->child.pidfd >= 0) {
2560 siginfo_t copy;
2561
2562 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2563 * structure here */
2564 if (si)
2565 copy = *si;
2566
2567 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2568 /* Let's propagate the error only if the system call is not implemented or prohibited */
2569 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2570 return -errno;
2571 } else
2572 return 0;
2573 }
2574
2575 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2576 * this here. */
2577 if (flags != 0)
2578 return -EOPNOTSUPP;
2579
2580 if (si) {
2581 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2582 siginfo_t copy = *si;
2583
2584 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2585 return -errno;
2586 } else if (kill(s->child.pid, sig) < 0)
2587 return -errno;
2588
2589 return 0;
2590}
2591
2592_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2593 assert_return(s, -EINVAL);
2594 assert_return(s->type == SOURCE_CHILD, -EDOM);
2595
2596 if (s->child.pidfd < 0)
2597 return -EOPNOTSUPP;
2598
2599 return s->child.pidfd_owned;
2600}
2601
2602_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2603 assert_return(s, -EINVAL);
2604 assert_return(s->type == SOURCE_CHILD, -EDOM);
2605
2606 if (s->child.pidfd < 0)
2607 return -EOPNOTSUPP;
2608
2609 s->child.pidfd_owned = own;
2610 return 0;
2611}
2612
2613_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2614 assert_return(s, -EINVAL);
2615 assert_return(s->type == SOURCE_CHILD, -EDOM);
2616
2617 return s->child.process_owned;
2618}
2619
2620_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2621 assert_return(s, -EINVAL);
2622 assert_return(s->type == SOURCE_CHILD, -EDOM);
2623
2624 s->child.process_owned = own;
2625 return 0;
2626}
2627
97ef5391
LP
2628_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2629 assert_return(s, -EINVAL);
2630 assert_return(mask, -EINVAL);
2631 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2632 assert_return(!event_pid_changed(s->event), -ECHILD);
2633
2634 *mask = s->inotify.mask;
2635 return 0;
2636}
2637
718db961 2638_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2639 int r;
2640
da7e457c 2641 assert_return(s, -EINVAL);
6203e07a 2642 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2643 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2644 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2645
2646 if (s->prepare == callback)
2647 return 0;
2648
2649 if (callback && s->prepare) {
2650 s->prepare = callback;
2651 return 0;
2652 }
2653
2654 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2655 if (r < 0)
2656 return r;
2657
2658 s->prepare = callback;
2659
2660 if (callback) {
2661 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2662 if (r < 0)
2663 return r;
2664 } else
2665 prioq_remove(s->event->prepare, s, &s->prepare_index);
2666
2667 return 0;
2668}
2669
f7262a9f 2670_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2671 assert_return(s, NULL);
fd38203a
LP
2672
2673 return s->userdata;
2674}
2675
8f726607
LP
2676_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2677 void *ret;
2678
2679 assert_return(s, NULL);
2680
2681 ret = s->userdata;
2682 s->userdata = userdata;
2683
2684 return ret;
2685}
2686
c2ba3ad6
LP
2687static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2688 usec_t c;
2689 assert(e);
2690 assert(a <= b);
2691
2692 if (a <= 0)
2693 return 0;
393003e1
LP
2694 if (a >= USEC_INFINITY)
2695 return USEC_INFINITY;
c2ba3ad6
LP
2696
2697 if (b <= a + 1)
2698 return a;
2699
52444dc4
LP
2700 initialize_perturb(e);
2701
c2ba3ad6
LP
2702 /*
2703 Find a good time to wake up again between times a and b. We
2704 have two goals here:
2705
2706 a) We want to wake up as seldom as possible, hence prefer
2707 later times over earlier times.
2708
2709 b) But if we have to wake up, then let's make sure to
2710 dispatch as much as possible on the entire system.
2711
2712 We implement this by waking up everywhere at the same time
850516e0 2713 within any given minute if we can, synchronised via the
c2ba3ad6 2714 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
2715 then we try to find the same spot in every 10s, then 1s and
2716 then 250ms step. Otherwise, we pick the last possible time
2717 to wake up.
c2ba3ad6
LP
2718 */
2719
850516e0
LP
2720 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2721 if (c >= b) {
2722 if (_unlikely_(c < USEC_PER_MINUTE))
2723 return b;
2724
2725 c -= USEC_PER_MINUTE;
2726 }
2727
ba276c81
LP
2728 if (c >= a)
2729 return c;
2730
2731 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2732 if (c >= b) {
2733 if (_unlikely_(c < USEC_PER_SEC*10))
2734 return b;
2735
2736 c -= USEC_PER_SEC*10;
2737 }
2738
850516e0
LP
2739 if (c >= a)
2740 return c;
2741
2742 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
2743 if (c >= b) {
2744 if (_unlikely_(c < USEC_PER_SEC))
2745 return b;
2746
2747 c -= USEC_PER_SEC;
2748 }
2749
2750 if (c >= a)
2751 return c;
2752
2753 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2754 if (c >= b) {
2755 if (_unlikely_(c < USEC_PER_MSEC*250))
2756 return b;
2757
2758 c -= USEC_PER_MSEC*250;
2759 }
2760
2761 if (c >= a)
2762 return c;
2763
2764 return b;
2765}
2766
fd38203a
LP
2767static int event_arm_timer(
2768 sd_event *e,
6a0f1f6d 2769 struct clock_data *d) {
fd38203a
LP
2770
2771 struct itimerspec its = {};
c2ba3ad6
LP
2772 sd_event_source *a, *b;
2773 usec_t t;
fd38203a 2774
cde93897 2775 assert(e);
6a0f1f6d 2776 assert(d);
fd38203a 2777
d06441da 2778 if (!d->needs_rearm)
212bbb17
TG
2779 return 0;
2780 else
2781 d->needs_rearm = false;
2782
6a0f1f6d 2783 a = prioq_peek(d->earliest);
393003e1 2784 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
72aedc1e 2785
6a0f1f6d 2786 if (d->fd < 0)
c57b5ca3
LP
2787 return 0;
2788
3a43da28 2789 if (d->next == USEC_INFINITY)
72aedc1e
LP
2790 return 0;
2791
2792 /* disarm */
15c689d7
LP
2793 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
2794 return -errno;
72aedc1e 2795
3a43da28 2796 d->next = USEC_INFINITY;
fd38203a 2797 return 0;
72aedc1e 2798 }
fd38203a 2799
6a0f1f6d 2800 b = prioq_peek(d->latest);
baf76283 2801 assert_se(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 2802
1bce0ffa 2803 t = sleep_between(e, a->time.next, time_event_source_latest(b));
6a0f1f6d 2804 if (d->next == t)
fd38203a
LP
2805 return 0;
2806
6a0f1f6d 2807 assert_se(d->fd >= 0);
fd38203a 2808
c2ba3ad6 2809 if (t == 0) {
fd38203a
LP
2810 /* We don' want to disarm here, just mean some time looooong ago. */
2811 its.it_value.tv_sec = 0;
2812 its.it_value.tv_nsec = 1;
2813 } else
c2ba3ad6 2814 timespec_store(&its.it_value, t);
fd38203a 2815
15c689d7 2816 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 2817 return -errno;
fd38203a 2818
6a0f1f6d 2819 d->next = t;
fd38203a
LP
2820 return 0;
2821}
2822
9a800b56 2823static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
2824 assert(e);
2825 assert(s);
2826 assert(s->type == SOURCE_IO);
2827
9a800b56
LP
2828 /* If the event source was already pending, we just OR in the
2829 * new revents, otherwise we reset the value. The ORing is
2830 * necessary to handle EPOLLONESHOT events properly where
2831 * readability might happen independently of writability, and
2832 * we need to keep track of both */
2833
2834 if (s->pending)
2835 s->io.revents |= revents;
2836 else
2837 s->io.revents = revents;
fd38203a 2838
fd38203a
LP
2839 return source_set_pending(s, true);
2840}
2841
72aedc1e 2842static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
2843 uint64_t x;
2844 ssize_t ss;
2845
2846 assert(e);
da7e457c 2847 assert(fd >= 0);
72aedc1e 2848
305f78bf 2849 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
2850
2851 ss = read(fd, &x, sizeof(x));
2852 if (ss < 0) {
945c2931 2853 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
2854 return 0;
2855
2856 return -errno;
2857 }
2858
8d35dae7 2859 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
2860 return -EIO;
2861
cde93897 2862 if (next)
3a43da28 2863 *next = USEC_INFINITY;
72aedc1e 2864
fd38203a
LP
2865 return 0;
2866}
2867
305f78bf
LP
2868static int process_timer(
2869 sd_event *e,
2870 usec_t n,
6a0f1f6d 2871 struct clock_data *d) {
305f78bf 2872
fd38203a
LP
2873 sd_event_source *s;
2874 int r;
2875
2876 assert(e);
6a0f1f6d 2877 assert(d);
fd38203a
LP
2878
2879 for (;;) {
6a0f1f6d 2880 s = prioq_peek(d->earliest);
fd38203a
LP
2881 if (!s ||
2882 s->time.next > n ||
baf76283 2883 s->enabled == SD_EVENT_OFF ||
fd38203a
LP
2884 s->pending)
2885 break;
2886
2887 r = source_set_pending(s, true);
2888 if (r < 0)
2889 return r;
2890
6a0f1f6d
LP
2891 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2892 prioq_reshuffle(d->latest, s, &s->time.latest_index);
e07bbb7c 2893 d->needs_rearm = true;
fd38203a
LP
2894 }
2895
2896 return 0;
2897}
2898
2899static int process_child(sd_event *e) {
2900 sd_event_source *s;
fd38203a
LP
2901 int r;
2902
2903 assert(e);
2904
c2ba3ad6
LP
2905 e->need_process_child = false;
2906
fd38203a
LP
2907 /*
2908 So, this is ugly. We iteratively invoke waitid() with P_PID
2909 + WNOHANG for each PID we wait for, instead of using
2910 P_ALL. This is because we only want to get child
2911 information of very specific child processes, and not all
2912 of them. We might not have processed the SIGCHLD even of a
2913 previous invocation and we don't want to maintain a
2914 unbounded *per-child* event queue, hence we really don't
2915 want anything flushed out of the kernel's queue that we
2916 don't care about. Since this is O(n) this means that if you
2917 have a lot of processes you probably want to handle SIGCHLD
2918 yourself.
08cd1552
LP
2919
2920 We do not reap the children here (by using WNOWAIT), this
2921 is only done after the event source is dispatched so that
2922 the callback still sees the process as a zombie.
fd38203a
LP
2923 */
2924
90e74a66 2925 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
2926 assert(s->type == SOURCE_CHILD);
2927
2928 if (s->pending)
2929 continue;
2930
baf76283 2931 if (s->enabled == SD_EVENT_OFF)
fd38203a
LP
2932 continue;
2933
f8f3f926
LP
2934 if (s->child.exited)
2935 continue;
2936
2937 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2938 continue;
2939
fd38203a 2940 zero(s->child.siginfo);
15c689d7
LP
2941 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
2942 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
fd38203a
LP
2943 return -errno;
2944
2945 if (s->child.siginfo.si_pid != 0) {
945c2931 2946 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 2947
f8f3f926
LP
2948 if (zombie)
2949 s->child.exited = true;
2950
08cd1552
LP
2951 if (!zombie && (s->child.options & WEXITED)) {
2952 /* If the child isn't dead then let's
2953 * immediately remove the state change
2954 * from the queue, since there's no
2955 * benefit in leaving it queued */
2956
2957 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 2958 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
2959 }
2960
fd38203a
LP
2961 r = source_set_pending(s, true);
2962 if (r < 0)
2963 return r;
2964 }
2965 }
2966
fd38203a
LP
2967 return 0;
2968}
2969
f8f3f926
LP
2970static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2971 assert(e);
2972 assert(s);
2973 assert(s->type == SOURCE_CHILD);
2974
2975 if (s->pending)
2976 return 0;
2977
2978 if (s->enabled == SD_EVENT_OFF)
2979 return 0;
2980
2981 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2982 return 0;
2983
2984 zero(s->child.siginfo);
2985 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2986 return -errno;
2987
2988 if (s->child.siginfo.si_pid == 0)
2989 return 0;
2990
2991 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2992 s->child.exited = true;
2993
2994 return source_set_pending(s, true);
2995}
2996
9da4cb2b 2997static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
fd38203a 2998 bool read_one = false;
fd38203a
LP
2999 int r;
3000
da7e457c 3001 assert(e);
97ef5391 3002 assert(d);
305f78bf 3003 assert_return(events == EPOLLIN, -EIO);
fd38203a 3004
9da4cb2b
LP
3005 /* If there's a signal queued on this priority and SIGCHLD is
3006 on this priority too, then make sure to recheck the
3007 children we watch. This is because we only ever dequeue
3008 the first signal per priority, and if we dequeue one, and
3009 SIGCHLD might be enqueued later we wouldn't know, but we
3010 might have higher priority children we care about hence we
3011 need to check that explicitly. */
3012
3013 if (sigismember(&d->sigset, SIGCHLD))
3014 e->need_process_child = true;
3015
3016 /* If there's already an event source pending for this
3017 * priority we don't read another */
3018 if (d->current)
3019 return 0;
3020
fd38203a 3021 for (;;) {
0eb2e0e3 3022 struct signalfd_siginfo si;
7057bd99 3023 ssize_t n;
92daebc0 3024 sd_event_source *s = NULL;
fd38203a 3025
9da4cb2b 3026 n = read(d->fd, &si, sizeof(si));
7057bd99 3027 if (n < 0) {
945c2931 3028 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
3029 return read_one;
3030
3031 return -errno;
3032 }
3033
7057bd99 3034 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3035 return -EIO;
3036
6eb7c172 3037 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3038
fd38203a
LP
3039 read_one = true;
3040
92daebc0
LP
3041 if (e->signal_sources)
3042 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3043 if (!s)
3044 continue;
9da4cb2b
LP
3045 if (s->pending)
3046 continue;
fd38203a
LP
3047
3048 s->signal.siginfo = si;
9da4cb2b
LP
3049 d->current = s;
3050
fd38203a
LP
3051 r = source_set_pending(s, true);
3052 if (r < 0)
3053 return r;
9da4cb2b
LP
3054
3055 return 1;
fd38203a 3056 }
fd38203a
LP
3057}
3058
97ef5391
LP
3059static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3060 ssize_t n;
3061
3062 assert(e);
3063 assert(d);
3064
3065 assert_return(revents == EPOLLIN, -EIO);
3066
3067 /* If there's already an event source pending for this priority, don't read another */
3068 if (d->n_pending > 0)
3069 return 0;
3070
3071 /* Is the read buffer non-empty? If so, let's not read more */
3072 if (d->buffer_filled > 0)
3073 return 0;
3074
3075 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3076 if (n < 0) {
3077 if (IN_SET(errno, EAGAIN, EINTR))
3078 return 0;
3079
3080 return -errno;
3081 }
3082
3083 assert(n > 0);
3084 d->buffer_filled = (size_t) n;
3085 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3086
3087 return 1;
3088}
3089
3090static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3091 assert(e);
3092 assert(d);
3093 assert(sz <= d->buffer_filled);
3094
3095 if (sz == 0)
3096 return;
3097
3098 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3099 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3100 d->buffer_filled -= sz;
3101
3102 if (d->buffer_filled == 0)
3103 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3104}
3105
3106static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3107 int r;
3108
3109 assert(e);
3110 assert(d);
3111
3112 /* If there's already an event source pending for this priority, don't read another */
3113 if (d->n_pending > 0)
3114 return 0;
3115
3116 while (d->buffer_filled > 0) {
3117 size_t sz;
3118
3119 /* Let's validate that the event structures are complete */
3120 if (d->buffer_filled < offsetof(struct inotify_event, name))
3121 return -EIO;
3122
3123 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3124 if (d->buffer_filled < sz)
3125 return -EIO;
3126
3127 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3128 struct inode_data *inode_data;
97ef5391
LP
3129
3130 /* The queue overran, let's pass this event to all event sources connected to this inotify
3131 * object */
3132
90e74a66 3133 HASHMAP_FOREACH(inode_data, d->inodes) {
97ef5391
LP
3134 sd_event_source *s;
3135
3136 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3137
3138 if (s->enabled == SD_EVENT_OFF)
3139 continue;
3140
3141 r = source_set_pending(s, true);
3142 if (r < 0)
3143 return r;
3144 }
3145 }
3146 } else {
3147 struct inode_data *inode_data;
3148 sd_event_source *s;
3149
3150 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3151 * our watch descriptor table. */
3152 if (d->buffer.ev.mask & IN_IGNORED) {
3153
3154 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3155 if (!inode_data) {
3156 event_inotify_data_drop(e, d, sz);
3157 continue;
3158 }
3159
3160 /* The watch descriptor was removed by the kernel, let's drop it here too */
3161 inode_data->wd = -1;
3162 } else {
3163 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3164 if (!inode_data) {
3165 event_inotify_data_drop(e, d, sz);
3166 continue;
3167 }
3168 }
3169
3170 /* Trigger all event sources that are interested in these events. Also trigger all event
3171 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3172 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3173
3174 if (s->enabled == SD_EVENT_OFF)
3175 continue;
3176
3177 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3178 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3179 continue;
3180
3181 r = source_set_pending(s, true);
3182 if (r < 0)
3183 return r;
3184 }
3185 }
3186
3187 /* Something pending now? If so, let's finish, otherwise let's read more. */
3188 if (d->n_pending > 0)
3189 return 1;
3190 }
3191
3192 return 0;
3193}
3194
3195static int process_inotify(sd_event *e) {
3196 struct inotify_data *d;
3197 int r, done = 0;
3198
3199 assert(e);
3200
3201 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3202 r = event_inotify_data_process(e, d);
3203 if (r < 0)
3204 return r;
3205 if (r > 0)
3206 done ++;
3207 }
3208
3209 return done;
3210}
3211
fd38203a 3212static int source_dispatch(sd_event_source *s) {
b778cba4 3213 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3214 EventSourceType saved_type;
fe8245eb 3215 int r = 0;
fd38203a
LP
3216
3217 assert(s);
6203e07a 3218 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3219
b778cba4
LP
3220 /* Save the event source type, here, so that we still know it after the event callback which might
3221 * invalidate the event. */
8f5c235d
LP
3222 saved_type = s->type;
3223
b778cba4
LP
3224 /* Similar, store a reference to the event loop object, so that we can still access it after the
3225 * callback might have invalidated/disconnected the event source. */
3226 saved_event = sd_event_ref(s->event);
3227
945c2931 3228 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3229 r = source_set_pending(s, false);
3230 if (r < 0)
3231 return r;
3232 }
fd38203a 3233
6e9feda3
LP
3234 if (s->type != SOURCE_POST) {
3235 sd_event_source *z;
6e9feda3
LP
3236
3237 /* If we execute a non-post source, let's mark all
3238 * post sources as pending */
3239
90e74a66 3240 SET_FOREACH(z, s->event->post_sources) {
6e9feda3
LP
3241 if (z->enabled == SD_EVENT_OFF)
3242 continue;
3243
3244 r = source_set_pending(z, true);
3245 if (r < 0)
3246 return r;
3247 }
3248 }
3249
baf76283
LP
3250 if (s->enabled == SD_EVENT_ONESHOT) {
3251 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3252 if (r < 0)
3253 return r;
3254 }
3255
12179984 3256 s->dispatching = true;
b7484e2a 3257
fd38203a
LP
3258 switch (s->type) {
3259
3260 case SOURCE_IO:
3261 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3262 break;
3263
6a0f1f6d 3264 case SOURCE_TIME_REALTIME:
a8548816 3265 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3266 case SOURCE_TIME_MONOTONIC:
3267 case SOURCE_TIME_REALTIME_ALARM:
3268 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3269 r = s->time.callback(s, s->time.next, s->userdata);
3270 break;
3271
3272 case SOURCE_SIGNAL:
3273 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3274 break;
3275
08cd1552
LP
3276 case SOURCE_CHILD: {
3277 bool zombie;
3278
945c2931 3279 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3280
fd38203a 3281 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3282
3283 /* Now, reap the PID for good. */
f8f3f926 3284 if (zombie) {
cc59d290 3285 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3286 s->child.waited = true;
3287 }
08cd1552 3288
fd38203a 3289 break;
08cd1552 3290 }
fd38203a
LP
3291
3292 case SOURCE_DEFER:
3293 r = s->defer.callback(s, s->userdata);
3294 break;
da7e457c 3295
6e9feda3
LP
3296 case SOURCE_POST:
3297 r = s->post.callback(s, s->userdata);
3298 break;
3299
6203e07a
LP
3300 case SOURCE_EXIT:
3301 r = s->exit.callback(s, s->userdata);
da7e457c 3302 break;
9d3e3aa5 3303
97ef5391
LP
3304 case SOURCE_INOTIFY: {
3305 struct sd_event *e = s->event;
3306 struct inotify_data *d;
3307 size_t sz;
3308
3309 assert(s->inotify.inode_data);
3310 assert_se(d = s->inotify.inode_data->inotify_data);
3311
3312 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3313 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3314 assert(d->buffer_filled >= sz);
3315
3316 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3317
3318 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3319 * buffer. */
3320 if (d->n_pending == 0)
3321 event_inotify_data_drop(e, d, sz);
3322
3323 break;
3324 }
3325
9d3e3aa5 3326 case SOURCE_WATCHDOG:
a71fe8b8 3327 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3328 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
9d3e3aa5 3329 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
3330 }
3331
12179984
LP
3332 s->dispatching = false;
3333
b778cba4
LP
3334 if (r < 0) {
3335 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3336 strna(s->description),
3337 event_source_type_to_string(saved_type),
3338 s->exit_on_failure ? "exiting" : "disabling");
3339
3340 if (s->exit_on_failure)
3341 (void) sd_event_exit(saved_event, r);
3342 }
12179984
LP
3343
3344 if (s->n_ref == 0)
3345 source_free(s);
3346 else if (r < 0)
6203e07a 3347 sd_event_source_set_enabled(s, SD_EVENT_OFF);
b7484e2a 3348
6203e07a 3349 return 1;
fd38203a
LP
3350}
3351
3352static int event_prepare(sd_event *e) {
3353 int r;
3354
3355 assert(e);
3356
3357 for (;;) {
3358 sd_event_source *s;
3359
3360 s = prioq_peek(e->prepare);
baf76283 3361 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
fd38203a
LP
3362 break;
3363
3364 s->prepare_iteration = e->iteration;
3365 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3366 if (r < 0)
3367 return r;
3368
3369 assert(s->prepare);
12179984
LP
3370
3371 s->dispatching = true;
fd38203a 3372 r = s->prepare(s, s->userdata);
12179984
LP
3373 s->dispatching = false;
3374
b778cba4
LP
3375 if (r < 0) {
3376 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3377 strna(s->description),
3378 event_source_type_to_string(s->type),
3379 s->exit_on_failure ? "exiting" : "disabling");
3380
3381 if (s->exit_on_failure)
3382 (void) sd_event_exit(e, r);
3383 }
fd38203a 3384
12179984
LP
3385 if (s->n_ref == 0)
3386 source_free(s);
3387 else if (r < 0)
3388 sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3389 }
3390
3391 return 0;
3392}
3393
6203e07a 3394static int dispatch_exit(sd_event *e) {
da7e457c 3395 sd_event_source *p;
30dd293c 3396 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
da7e457c
LP
3397 int r;
3398
3399 assert(e);
3400
6203e07a 3401 p = prioq_peek(e->exit);
baf76283 3402 if (!p || p->enabled == SD_EVENT_OFF) {
da7e457c
LP
3403 e->state = SD_EVENT_FINISHED;
3404 return 0;
3405 }
3406
30dd293c 3407 ref = sd_event_ref(e);
da7e457c 3408 e->iteration++;
6203e07a 3409 e->state = SD_EVENT_EXITING;
da7e457c 3410 r = source_dispatch(p);
2b0c9ef7 3411 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3412 return r;
3413}
3414
c2ba3ad6
LP
3415static sd_event_source* event_next_pending(sd_event *e) {
3416 sd_event_source *p;
3417
da7e457c
LP
3418 assert(e);
3419
c2ba3ad6
LP
3420 p = prioq_peek(e->pending);
3421 if (!p)
3422 return NULL;
3423
baf76283 3424 if (p->enabled == SD_EVENT_OFF)
c2ba3ad6
LP
3425 return NULL;
3426
3427 return p;
3428}
3429
cde93897
LP
3430static int arm_watchdog(sd_event *e) {
3431 struct itimerspec its = {};
3432 usec_t t;
cde93897
LP
3433
3434 assert(e);
3435 assert(e->watchdog_fd >= 0);
3436
3437 t = sleep_between(e,
3438 e->watchdog_last + (e->watchdog_period / 2),
3439 e->watchdog_last + (e->watchdog_period * 3 / 4));
3440
3441 timespec_store(&its.it_value, t);
3442
75145780
LP
3443 /* Make sure we never set the watchdog to 0, which tells the
3444 * kernel to disable it. */
3445 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3446 its.it_value.tv_nsec = 1;
3447
15c689d7 3448 if (timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897
LP
3449 return -errno;
3450
3451 return 0;
3452}
3453
3454static int process_watchdog(sd_event *e) {
3455 assert(e);
3456
3457 if (!e->watchdog)
3458 return 0;
3459
3460 /* Don't notify watchdog too often */
3461 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3462 return 0;
3463
3464 sd_notify(false, "WATCHDOG=1");
3465 e->watchdog_last = e->timestamp.monotonic;
3466
3467 return arm_watchdog(e);
3468}
3469
97ef5391
LP
3470static void event_close_inode_data_fds(sd_event *e) {
3471 struct inode_data *d;
3472
3473 assert(e);
3474
3475 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3476 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3477 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3478 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3479 * compromise. */
3480
3481 while ((d = e->inode_data_to_close)) {
3482 assert(d->fd >= 0);
3483 d->fd = safe_close(d->fd);
3484
3485 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3486 }
3487}
3488
c45a5a74
TG
3489_public_ int sd_event_prepare(sd_event *e) {
3490 int r;
fd38203a 3491
da7e457c 3492 assert_return(e, -EINVAL);
b937d761 3493 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3494 assert_return(!event_pid_changed(e), -ECHILD);
3495 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3496 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3497
e5446015
LP
3498 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3499 * this check here once, since gettid() is typically not cached, and thus want to minimize
3500 * syscalls */
3501 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3502
6203e07a 3503 if (e->exit_requested)
c45a5a74 3504 goto pending;
fd38203a
LP
3505
3506 e->iteration++;
3507
0be6c2f6 3508 e->state = SD_EVENT_PREPARING;
fd38203a 3509 r = event_prepare(e);
0be6c2f6 3510 e->state = SD_EVENT_INITIAL;
fd38203a 3511 if (r < 0)
c45a5a74 3512 return r;
fd38203a 3513
6a0f1f6d
LP
3514 r = event_arm_timer(e, &e->realtime);
3515 if (r < 0)
c45a5a74 3516 return r;
6a0f1f6d 3517
a8548816
TG
3518 r = event_arm_timer(e, &e->boottime);
3519 if (r < 0)
c45a5a74 3520 return r;
a8548816 3521
6a0f1f6d
LP
3522 r = event_arm_timer(e, &e->monotonic);
3523 if (r < 0)
c45a5a74 3524 return r;
6a0f1f6d
LP
3525
3526 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3527 if (r < 0)
c45a5a74 3528 return r;
fd38203a 3529
6a0f1f6d 3530 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3531 if (r < 0)
c45a5a74 3532 return r;
fd38203a 3533
97ef5391
LP
3534 event_close_inode_data_fds(e);
3535
1b5995b0 3536 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3537 goto pending;
3538
2b0c9ef7 3539 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3540
3541 return 0;
3542
3543pending:
2b0c9ef7 3544 e->state = SD_EVENT_ARMED;
6d148a84
TG
3545 r = sd_event_wait(e, 0);
3546 if (r == 0)
2b0c9ef7 3547 e->state = SD_EVENT_ARMED;
6d148a84
TG
3548
3549 return r;
c45a5a74
TG
3550}
3551
3552_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
5cddd924 3553 size_t event_queue_max;
c45a5a74
TG
3554 int r, m, i;
3555
3556 assert_return(e, -EINVAL);
b937d761 3557 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3558 assert_return(!event_pid_changed(e), -ECHILD);
3559 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3560 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
c45a5a74
TG
3561
3562 if (e->exit_requested) {
3563 e->state = SD_EVENT_PENDING;
3564 return 1;
3565 }
6a0f1f6d 3566
5cddd924
LP
3567 event_queue_max = MAX(e->n_sources, 1u);
3568 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3569 return -ENOMEM;
fd38203a 3570
97ef5391
LP
3571 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3572 if (e->inotify_data_buffered)
3573 timeout = 0;
3574
5cddd924 3575 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
bab4820e 3576 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
da7e457c 3577 if (m < 0) {
c45a5a74
TG
3578 if (errno == EINTR) {
3579 e->state = SD_EVENT_PENDING;
3580 return 1;
3581 }
3582
3583 r = -errno;
da7e457c
LP
3584 goto finish;
3585 }
fd38203a 3586
e475d10c 3587 triple_timestamp_get(&e->timestamp);
fd38203a
LP
3588
3589 for (i = 0; i < m; i++) {
3590
5cddd924
LP
3591 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3592 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3593 else {
5cddd924 3594 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3595
3596 switch (*t) {
3597
f8f3f926 3598 case WAKEUP_EVENT_SOURCE: {
5cddd924 3599 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
3600
3601 assert(s);
3602
3603 switch (s->type) {
3604
3605 case SOURCE_IO:
5cddd924 3606 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
3607 break;
3608
3609 case SOURCE_CHILD:
5cddd924 3610 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
3611 break;
3612
3613 default:
3614 assert_not_reached("Unexpected event source type");
3615 }
3616
9da4cb2b 3617 break;
f8f3f926 3618 }
fd38203a 3619
9da4cb2b 3620 case WAKEUP_CLOCK_DATA: {
5cddd924 3621 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
3622
3623 assert(d);
3624
5cddd924 3625 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
3626 break;
3627 }
3628
3629 case WAKEUP_SIGNAL_DATA:
5cddd924 3630 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
9da4cb2b
LP
3631 break;
3632
97ef5391 3633 case WAKEUP_INOTIFY_DATA:
5cddd924 3634 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
97ef5391
LP
3635 break;
3636
9da4cb2b
LP
3637 default:
3638 assert_not_reached("Invalid wake-up pointer");
3639 }
3640 }
fd38203a 3641 if (r < 0)
da7e457c 3642 goto finish;
fd38203a
LP
3643 }
3644
cde93897
LP
3645 r = process_watchdog(e);
3646 if (r < 0)
3647 goto finish;
3648
6a0f1f6d
LP
3649 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3650 if (r < 0)
3651 goto finish;
3652
e475d10c 3653 r = process_timer(e, e->timestamp.boottime, &e->boottime);
a8548816
TG
3654 if (r < 0)
3655 goto finish;
3656
6a0f1f6d
LP
3657 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3658 if (r < 0)
3659 goto finish;
3660
3661 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 3662 if (r < 0)
da7e457c 3663 goto finish;
fd38203a 3664
e475d10c 3665 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 3666 if (r < 0)
da7e457c 3667 goto finish;
fd38203a 3668
c2ba3ad6 3669 if (e->need_process_child) {
fd38203a
LP
3670 r = process_child(e);
3671 if (r < 0)
da7e457c 3672 goto finish;
fd38203a
LP
3673 }
3674
97ef5391
LP
3675 r = process_inotify(e);
3676 if (r < 0)
3677 goto finish;
3678
c45a5a74
TG
3679 if (event_next_pending(e)) {
3680 e->state = SD_EVENT_PENDING;
3681
3682 return 1;
da7e457c
LP
3683 }
3684
c45a5a74 3685 r = 0;
fd38203a 3686
da7e457c 3687finish:
2b0c9ef7 3688 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3689
3690 return r;
fd38203a
LP
3691}
3692
c45a5a74
TG
3693_public_ int sd_event_dispatch(sd_event *e) {
3694 sd_event_source *p;
3695 int r;
3696
3697 assert_return(e, -EINVAL);
b937d761 3698 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3699 assert_return(!event_pid_changed(e), -ECHILD);
3700 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3701 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3702
3703 if (e->exit_requested)
3704 return dispatch_exit(e);
3705
3706 p = event_next_pending(e);
3707 if (p) {
30dd293c 3708 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
c45a5a74 3709
30dd293c 3710 ref = sd_event_ref(e);
c45a5a74
TG
3711 e->state = SD_EVENT_RUNNING;
3712 r = source_dispatch(p);
2b0c9ef7 3713 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3714 return r;
3715 }
3716
2b0c9ef7 3717 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3718
3719 return 1;
3720}
3721
34b87517 3722static void event_log_delays(sd_event *e) {
442ac269
YW
3723 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3724 size_t l, i;
34b87517 3725
442ac269
YW
3726 p = b;
3727 l = sizeof(b);
3728 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3729 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
3730 e->delays[i] = 0;
3731 }
442ac269 3732 log_debug("Event loop iterations: %s", b);
34b87517
VC
3733}
3734
c45a5a74
TG
3735_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3736 int r;
3737
3738 assert_return(e, -EINVAL);
b937d761 3739 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3740 assert_return(!event_pid_changed(e), -ECHILD);
3741 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3742 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 3743
34b87517
VC
3744 if (e->profile_delays && e->last_run) {
3745 usec_t this_run;
3746 unsigned l;
3747
3748 this_run = now(CLOCK_MONOTONIC);
3749
3750 l = u64log2(this_run - e->last_run);
3751 assert(l < sizeof(e->delays));
3752 e->delays[l]++;
3753
3754 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3755 event_log_delays(e);
3756 e->last_log = this_run;
3757 }
3758 }
3759
c45a5a74 3760 r = sd_event_prepare(e);
53bac4e0
LP
3761 if (r == 0)
3762 /* There was nothing? Then wait... */
3763 r = sd_event_wait(e, timeout);
c45a5a74 3764
34b87517
VC
3765 if (e->profile_delays)
3766 e->last_run = now(CLOCK_MONOTONIC);
3767
02d30981 3768 if (r > 0) {
53bac4e0 3769 /* There's something now, then let's dispatch it */
02d30981
TG
3770 r = sd_event_dispatch(e);
3771 if (r < 0)
3772 return r;
53bac4e0
LP
3773
3774 return 1;
3775 }
3776
3777 return r;
c45a5a74
TG
3778}
3779
f7262a9f 3780_public_ int sd_event_loop(sd_event *e) {
30dd293c 3781 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
fd38203a
LP
3782 int r;
3783
da7e457c 3784 assert_return(e, -EINVAL);
b937d761 3785 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 3786 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 3787 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3788
30dd293c 3789 ref = sd_event_ref(e);
fd38203a 3790
da7e457c 3791 while (e->state != SD_EVENT_FINISHED) {
fd38203a
LP
3792 r = sd_event_run(e, (uint64_t) -1);
3793 if (r < 0)
30dd293c 3794 return r;
fd38203a
LP
3795 }
3796
30dd293c 3797 return e->exit_code;
fd38203a
LP
3798}
3799
9b364545 3800_public_ int sd_event_get_fd(sd_event *e) {
9b364545 3801 assert_return(e, -EINVAL);
b937d761 3802 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
3803 assert_return(!event_pid_changed(e), -ECHILD);
3804
3805 return e->epoll_fd;
3806}
3807
f7262a9f 3808_public_ int sd_event_get_state(sd_event *e) {
da7e457c 3809 assert_return(e, -EINVAL);
b937d761 3810 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3811 assert_return(!event_pid_changed(e), -ECHILD);
3812
3813 return e->state;
3814}
3815
6203e07a 3816_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 3817 assert_return(e, -EINVAL);
b937d761 3818 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 3819 assert_return(code, -EINVAL);
da7e457c 3820 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3821
6203e07a
LP
3822 if (!e->exit_requested)
3823 return -ENODATA;
3824
3825 *code = e->exit_code;
3826 return 0;
fd38203a
LP
3827}
3828
6203e07a 3829_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 3830 assert_return(e, -EINVAL);
b937d761 3831 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3832 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3833 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3834
6203e07a
LP
3835 e->exit_requested = true;
3836 e->exit_code = code;
3837
fd38203a
LP
3838 return 0;
3839}
46e8c825 3840
6a0f1f6d 3841_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 3842 assert_return(e, -EINVAL);
b937d761 3843 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 3844 assert_return(usec, -EINVAL);
46e8c825
LP
3845 assert_return(!event_pid_changed(e), -ECHILD);
3846
e475d10c
LP
3847 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3848 return -EOPNOTSUPP;
3849
3850 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3851 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3852 * the purpose of getting the time this doesn't matter. */
3411372e
LP
3853 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3854 return -EOPNOTSUPP;
3855
e475d10c 3856 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 3857 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
3858 *usec = now(clock);
3859 return 1;
3860 }
46e8c825 3861
e475d10c 3862 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
3863 return 0;
3864}
afc6adb5
LP
3865
3866_public_ int sd_event_default(sd_event **ret) {
39883f62 3867 sd_event *e = NULL;
afc6adb5
LP
3868 int r;
3869
3870 if (!ret)
3871 return !!default_event;
3872
3873 if (default_event) {
3874 *ret = sd_event_ref(default_event);
3875 return 0;
3876 }
3877
3878 r = sd_event_new(&e);
3879 if (r < 0)
3880 return r;
3881
3882 e->default_event_ptr = &default_event;
3883 e->tid = gettid();
3884 default_event = e;
3885
3886 *ret = e;
3887 return 1;
3888}
3889
3890_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3891 assert_return(e, -EINVAL);
b937d761 3892 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 3893 assert_return(tid, -EINVAL);
76b54375 3894 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 3895
76b54375
LP
3896 if (e->tid != 0) {
3897 *tid = e->tid;
3898 return 0;
3899 }
3900
3901 return -ENXIO;
afc6adb5 3902}
cde93897
LP
3903
3904_public_ int sd_event_set_watchdog(sd_event *e, int b) {
3905 int r;
3906
3907 assert_return(e, -EINVAL);
b937d761 3908 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 3909 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
3910
3911 if (e->watchdog == !!b)
3912 return e->watchdog;
3913
3914 if (b) {
09812eb7
LP
3915 r = sd_watchdog_enabled(false, &e->watchdog_period);
3916 if (r <= 0)
cde93897 3917 return r;
cde93897
LP
3918
3919 /* Issue first ping immediately */
3920 sd_notify(false, "WATCHDOG=1");
3921 e->watchdog_last = now(CLOCK_MONOTONIC);
3922
3923 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3924 if (e->watchdog_fd < 0)
3925 return -errno;
3926
3927 r = arm_watchdog(e);
3928 if (r < 0)
3929 goto fail;
3930
1eac7948 3931 struct epoll_event ev = {
a82f89aa
LP
3932 .events = EPOLLIN,
3933 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3934 };
cde93897 3935
15c689d7 3936 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
3937 r = -errno;
3938 goto fail;
3939 }
3940
3941 } else {
3942 if (e->watchdog_fd >= 0) {
5a795bff 3943 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 3944 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3945 }
3946 }
3947
3948 e->watchdog = !!b;
3949 return e->watchdog;
3950
3951fail:
03e334a1 3952 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3953 return r;
3954}
8f726607
LP
3955
3956_public_ int sd_event_get_watchdog(sd_event *e) {
3957 assert_return(e, -EINVAL);
b937d761 3958 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
3959 assert_return(!event_pid_changed(e), -ECHILD);
3960
3961 return e->watchdog;
3962}
60a3b1e1
LP
3963
3964_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3965 assert_return(e, -EINVAL);
b937d761 3966 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
3967 assert_return(!event_pid_changed(e), -ECHILD);
3968
3969 *ret = e->iteration;
3970 return 0;
3971}
15723a1d
LP
3972
3973_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3974 assert_return(s, -EINVAL);
3975
3976 s->destroy_callback = callback;
3977 return 0;
3978}
3979
3980_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3981 assert_return(s, -EINVAL);
3982
3983 if (ret)
3984 *ret = s->destroy_callback;
3985
3986 return !!s->destroy_callback;
3987}
2382c936
YW
3988
3989_public_ int sd_event_source_get_floating(sd_event_source *s) {
3990 assert_return(s, -EINVAL);
3991
3992 return s->floating;
3993}
3994
3995_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3996 assert_return(s, -EINVAL);
3997
3998 if (s->floating == !!b)
3999 return 0;
4000
4001 if (!s->event) /* Already disconnected */
4002 return -ESTALE;
4003
4004 s->floating = b;
4005
4006 if (b) {
4007 sd_event_source_ref(s);
4008 sd_event_unref(s->event);
4009 } else {
4010 sd_event_ref(s->event);
4011 sd_event_source_unref(s);
4012 }
4013
4014 return 1;
4015}
b778cba4
LP
4016
4017_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4018 assert_return(s, -EINVAL);
4019 assert_return(s->type != SOURCE_EXIT, -EDOM);
4020
4021 return s->exit_on_failure;
4022}
4023
4024_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4025 assert_return(s, -EINVAL);
4026 assert_return(s->type != SOURCE_EXIT, -EDOM);
4027
4028 if (s->exit_on_failure == !!b)
4029 return 0;
4030
4031 s->exit_on_failure = b;
4032 return 1;
4033}