]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
Merge pull request #17549 from yuwata/tiny-fixes
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
55cbfaa5
DM
40static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 53 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
54};
55
56DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
a8548816 58#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
6a0f1f6d 59
fd38203a 60struct sd_event {
da7e457c 61 unsigned n_ref;
fd38203a
LP
62
63 int epoll_fd;
cde93897 64 int watchdog_fd;
fd38203a
LP
65
66 Prioq *pending;
67 Prioq *prepare;
c2ba3ad6 68
a8548816 69 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
a8548816 73 struct clock_data boottime;
6a0f1f6d
LP
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
fd38203a 77
da7e457c
LP
78 usec_t perturb;
79
9da4cb2b
LP
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
82
83 Hashmap *child_sources;
baf76283 84 unsigned n_enabled_child_sources;
fd38203a 85
6e9feda3
LP
86 Set *post_sources;
87
6203e07a 88 Prioq *exit;
fd38203a 89
97ef5391
LP
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
da7e457c 98 pid_t original_pid;
c2ba3ad6 99
60a3b1e1 100 uint64_t iteration;
e475d10c 101 triple_timestamp timestamp;
da7e457c 102 int state;
eaa3cbef 103
6203e07a 104 bool exit_requested:1;
da7e457c 105 bool need_process_child:1;
cde93897 106 bool watchdog:1;
34b87517 107 bool profile_delays:1;
afc6adb5 108
6203e07a
LP
109 int exit_code;
110
afc6adb5
LP
111 pid_t tid;
112 sd_event **default_event_ptr;
cde93897
LP
113
114 usec_t watchdog_last, watchdog_period;
15b38f93
LP
115
116 unsigned n_sources;
a71fe8b8 117
5cddd924
LP
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
a71fe8b8 121 LIST_HEAD(sd_event_source, sources);
34b87517
VC
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
125};
126
b937d761
NM
127static thread_local sd_event *default_event = NULL;
128
a71fe8b8 129static void source_disconnect(sd_event_source *s);
97ef5391 130static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 131
b937d761
NM
132static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134}
135
fd38203a
LP
136static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
9c57a73b 138 int r;
fd38203a
LP
139
140 assert(x->pending);
141 assert(y->pending);
142
baf76283
LP
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 145 return -1;
baf76283 146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
147 return 1;
148
149 /* Lower priority values first */
9c57a73b
YW
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
fd38203a
LP
153
154 /* Older entries first */
9c57a73b 155 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
156}
157
158static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
9c57a73b 160 int r;
fd38203a
LP
161
162 assert(x->prepare);
163 assert(y->prepare);
164
8046c457
KK
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
fd38203a
LP
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
9c57a73b
YW
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
fd38203a 177
fd38203a 178 /* Lower priority values first */
9c57a73b 179 return CMP(x->priority, y->priority);
fd38203a
LP
180}
181
c2ba3ad6 182static int earliest_time_prioq_compare(const void *a, const void *b) {
fd38203a
LP
183 const sd_event_source *x = a, *y = b;
184
6a0f1f6d
LP
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
fd38203a 187
baf76283
LP
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 190 return -1;
baf76283 191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
9c57a73b 201 return CMP(x->time.next, y->time.next);
fd38203a
LP
202}
203
1bce0ffa
LP
204static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206}
207
c2ba3ad6
LP
208static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
6a0f1f6d
LP
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
c2ba3ad6 213
baf76283
LP
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
c2ba3ad6 216 return -1;
baf76283 217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
c2ba3ad6
LP
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
9c57a73b 227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
c2ba3ad6
LP
228}
229
6203e07a 230static int exit_prioq_compare(const void *a, const void *b) {
da7e457c
LP
231 const sd_event_source *x = a, *y = b;
232
6203e07a
LP
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
da7e457c 235
baf76283
LP
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
da7e457c 238 return -1;
baf76283 239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
da7e457c
LP
240 return 1;
241
242 /* Lower priority values first */
6dd91b36 243 return CMP(x->priority, y->priority);
da7e457c
LP
244}
245
6a0f1f6d
LP
246static void free_clock_data(struct clock_data *d) {
247 assert(d);
9da4cb2b 248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253}
254
8301aa0b 255static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
256 sd_event_source *s;
257
fd38203a 258 assert(e);
a71fe8b8
LP
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
15b38f93 266 assert(e->n_sources == 0);
fd38203a 267
afc6adb5
LP
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
03e334a1 271 safe_close(e->epoll_fd);
03e334a1 272 safe_close(e->watchdog_fd);
cde93897 273
6a0f1f6d 274 free_clock_data(&e->realtime);
a8548816 275 free_clock_data(&e->boottime);
6a0f1f6d
LP
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
fd38203a
LP
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
6203e07a 282 prioq_free(e->exit);
fd38203a
LP
283
284 free(e->signal_sources);
9da4cb2b 285 hashmap_free(e->signal_data);
fd38203a 286
97ef5391
LP
287 hashmap_free(e->inotify_data);
288
fd38203a 289 hashmap_free(e->child_sources);
6e9feda3 290 set_free(e->post_sources);
8301aa0b 291
5cddd924
LP
292 free(e->event_queue);
293
8301aa0b 294 return mfree(e);
fd38203a
LP
295}
296
f7262a9f 297_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
298 sd_event *e;
299 int r;
300
305f78bf 301 assert_return(ret, -EINVAL);
fd38203a 302
d08eb1fa 303 e = new(sd_event, 1);
fd38203a
LP
304 if (!e)
305 return -ENOMEM;
306
d08eb1fa
LP
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
fd38203a 329
c983e776
EV
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
fd38203a 332 goto fail;
fd38203a
LP
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
7fe2903c
LP
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
34b87517 342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
34a6843d 343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
34b87517
VC
344 e->profile_delays = true;
345 }
346
fd38203a
LP
347 *ret = e;
348 return 0;
349
350fail:
351 event_free(e);
352 return r;
353}
354
8301aa0b 355DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 356
afd15bbb
ZJS
357_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361}
362
eaa3cbef
LP
363static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
a2360a46 366 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
367 * it around over a fork(). Let's complain. */
368
df0ff127 369 return e->original_pid != getpid_cached();
eaa3cbef
LP
370}
371
366e6411 372static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
f6806734 376 if (event_pid_changed(s->event))
366e6411 377 return;
f6806734 378
fd38203a 379 if (!s->io.registered)
366e6411 380 return;
fd38203a 381
d1cf2023 382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
55cbfaa5
DM
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
385
386 s->io.registered = false;
fd38203a
LP
387}
388
305f78bf
LP
389static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
fd38203a
LP
394 assert(s);
395 assert(s->type == SOURCE_IO);
baf76283 396 assert(enabled != SD_EVENT_OFF);
fd38203a 397
1eac7948 398 struct epoll_event ev = {
a82f89aa
LP
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
fd38203a 402
15c689d7 403 if (epoll_ctl(s->event->epoll_fd,
1eac7948
ZJS
404 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
405 s->io.fd,
15c689d7 406 &ev) < 0)
fd38203a
LP
407 return -errno;
408
409 s->io.registered = true;
410
411 return 0;
412}
413
f8f3f926
LP
414static void source_child_pidfd_unregister(sd_event_source *s) {
415 assert(s);
416 assert(s->type == SOURCE_CHILD);
417
418 if (event_pid_changed(s->event))
419 return;
420
421 if (!s->child.registered)
422 return;
423
424 if (EVENT_SOURCE_WATCH_PIDFD(s))
425 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
426 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
427 strna(s->description), event_source_type_to_string(s->type));
428
429 s->child.registered = false;
430}
431
432static int source_child_pidfd_register(sd_event_source *s, int enabled) {
433 int r;
434
435 assert(s);
436 assert(s->type == SOURCE_CHILD);
437 assert(enabled != SD_EVENT_OFF);
438
439 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 440 struct epoll_event ev = {
f8f3f926
LP
441 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
442 .data.ptr = s,
443 };
444
445 if (s->child.registered)
446 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
447 else
448 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
449 if (r < 0)
450 return -errno;
451 }
452
453 s->child.registered = true;
454 return 0;
455}
456
6a0f1f6d
LP
457static clockid_t event_source_type_to_clock(EventSourceType t) {
458
459 switch (t) {
460
461 case SOURCE_TIME_REALTIME:
462 return CLOCK_REALTIME;
463
a8548816
TG
464 case SOURCE_TIME_BOOTTIME:
465 return CLOCK_BOOTTIME;
466
6a0f1f6d
LP
467 case SOURCE_TIME_MONOTONIC:
468 return CLOCK_MONOTONIC;
469
470 case SOURCE_TIME_REALTIME_ALARM:
471 return CLOCK_REALTIME_ALARM;
472
473 case SOURCE_TIME_BOOTTIME_ALARM:
474 return CLOCK_BOOTTIME_ALARM;
475
476 default:
477 return (clockid_t) -1;
478 }
479}
480
481static EventSourceType clock_to_event_source_type(clockid_t clock) {
482
483 switch (clock) {
484
485 case CLOCK_REALTIME:
486 return SOURCE_TIME_REALTIME;
487
a8548816
TG
488 case CLOCK_BOOTTIME:
489 return SOURCE_TIME_BOOTTIME;
490
6a0f1f6d
LP
491 case CLOCK_MONOTONIC:
492 return SOURCE_TIME_MONOTONIC;
493
494 case CLOCK_REALTIME_ALARM:
495 return SOURCE_TIME_REALTIME_ALARM;
496
497 case CLOCK_BOOTTIME_ALARM:
498 return SOURCE_TIME_BOOTTIME_ALARM;
499
500 default:
501 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
502 }
503}
504
505static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
506 assert(e);
507
508 switch (t) {
509
510 case SOURCE_TIME_REALTIME:
511 return &e->realtime;
512
a8548816
TG
513 case SOURCE_TIME_BOOTTIME:
514 return &e->boottime;
515
6a0f1f6d
LP
516 case SOURCE_TIME_MONOTONIC:
517 return &e->monotonic;
518
519 case SOURCE_TIME_REALTIME_ALARM:
520 return &e->realtime_alarm;
521
522 case SOURCE_TIME_BOOTTIME_ALARM:
523 return &e->boottime_alarm;
524
525 default:
526 return NULL;
527 }
528}
529
3e4eb8e7
YW
530static void event_free_signal_data(sd_event *e, struct signal_data *d) {
531 assert(e);
532
533 if (!d)
534 return;
535
536 hashmap_remove(e->signal_data, &d->priority);
537 safe_close(d->fd);
538 free(d);
539}
540
9da4cb2b
LP
541static int event_make_signal_data(
542 sd_event *e,
543 int sig,
544 struct signal_data **ret) {
4807d2d0 545
9da4cb2b
LP
546 struct signal_data *d;
547 bool added = false;
548 sigset_t ss_copy;
549 int64_t priority;
f95387cd
ZJS
550 int r;
551
552 assert(e);
553
f6806734 554 if (event_pid_changed(e))
9da4cb2b 555 return -ECHILD;
f6806734 556
9da4cb2b
LP
557 if (e->signal_sources && e->signal_sources[sig])
558 priority = e->signal_sources[sig]->priority;
559 else
de05913d 560 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 561
9da4cb2b
LP
562 d = hashmap_get(e->signal_data, &priority);
563 if (d) {
564 if (sigismember(&d->sigset, sig) > 0) {
565 if (ret)
566 *ret = d;
567 return 0;
568 }
569 } else {
570 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
571 if (r < 0)
572 return r;
573
d08eb1fa 574 d = new(struct signal_data, 1);
9da4cb2b
LP
575 if (!d)
576 return -ENOMEM;
577
d08eb1fa
LP
578 *d = (struct signal_data) {
579 .wakeup = WAKEUP_SIGNAL_DATA,
580 .fd = -1,
581 .priority = priority,
582 };
9da4cb2b
LP
583
584 r = hashmap_put(e->signal_data, &d->priority, d);
90f604d1
ZJS
585 if (r < 0) {
586 free(d);
9da4cb2b 587 return r;
90f604d1 588 }
f95387cd 589
9da4cb2b
LP
590 added = true;
591 }
592
593 ss_copy = d->sigset;
594 assert_se(sigaddset(&ss_copy, sig) >= 0);
595
596 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
597 if (r < 0) {
598 r = -errno;
599 goto fail;
600 }
601
602 d->sigset = ss_copy;
f95387cd 603
9da4cb2b
LP
604 if (d->fd >= 0) {
605 if (ret)
606 *ret = d;
f95387cd 607 return 0;
9da4cb2b
LP
608 }
609
7fe2903c 610 d->fd = fd_move_above_stdio(r);
f95387cd 611
1eac7948 612 struct epoll_event ev = {
a82f89aa
LP
613 .events = EPOLLIN,
614 .data.ptr = d,
615 };
f95387cd 616
15c689d7 617 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
618 r = -errno;
619 goto fail;
f95387cd
ZJS
620 }
621
9da4cb2b
LP
622 if (ret)
623 *ret = d;
624
f95387cd 625 return 0;
9da4cb2b
LP
626
627fail:
3e4eb8e7
YW
628 if (added)
629 event_free_signal_data(e, d);
9da4cb2b
LP
630
631 return r;
632}
633
634static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
635 assert(e);
636 assert(d);
637
638 /* Turns off the specified signal in the signal data
639 * object. If the signal mask of the object becomes empty that
640 * way removes it. */
641
642 if (sigismember(&d->sigset, sig) == 0)
643 return;
644
645 assert_se(sigdelset(&d->sigset, sig) >= 0);
646
647 if (sigisemptyset(&d->sigset)) {
9da4cb2b 648 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 649 event_free_signal_data(e, d);
9da4cb2b
LP
650 return;
651 }
652
653 assert(d->fd >= 0);
654
655 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
656 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
657}
658
659static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
660 struct signal_data *d;
661 static const int64_t zero_priority = 0;
662
663 assert(e);
664
f8f3f926
LP
665 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
666 * and possibly drop the signalfd for it. */
9da4cb2b
LP
667
668 if (sig == SIGCHLD &&
669 e->n_enabled_child_sources > 0)
670 return;
671
672 if (e->signal_sources &&
673 e->signal_sources[sig] &&
674 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
675 return;
676
677 /*
678 * The specified signal might be enabled in three different queues:
679 *
680 * 1) the one that belongs to the priority passed (if it is non-NULL)
681 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
682 * 3) the 0 priority (to cover the SIGCHLD case)
683 *
684 * Hence, let's remove it from all three here.
685 */
686
687 if (priority) {
688 d = hashmap_get(e->signal_data, priority);
689 if (d)
690 event_unmask_signal_data(e, d, sig);
691 }
692
693 if (e->signal_sources && e->signal_sources[sig]) {
694 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
695 if (d)
696 event_unmask_signal_data(e, d, sig);
697 }
698
699 d = hashmap_get(e->signal_data, &zero_priority);
700 if (d)
701 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
702}
703
e1951c16
MS
704static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
705 assert(s);
706
707 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
708 * they are enabled/disabled or marked pending and such. */
709
710 if (s->pending)
711 prioq_reshuffle(s->event->pending, s, &s->pending_index);
712
713 if (s->prepare)
714 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
715}
716
717static void event_source_time_prioq_reshuffle(sd_event_source *s) {
718 struct clock_data *d;
719
720 assert(s);
721 assert(EVENT_SOURCE_IS_TIME(s->type));
722
723 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
724 * pending, enable state. Makes sure the two prioq's are ordered properly again. */
725 assert_se(d = event_get_clock_data(s->event, s->type));
726 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
727 prioq_reshuffle(d->latest, s, &s->time.latest_index);
728 d->needs_rearm = true;
729}
730
a71fe8b8
LP
731static void source_disconnect(sd_event_source *s) {
732 sd_event *event;
733
fd38203a
LP
734 assert(s);
735
a71fe8b8
LP
736 if (!s->event)
737 return;
15b38f93 738
a71fe8b8 739 assert(s->event->n_sources > 0);
fd38203a 740
a71fe8b8 741 switch (s->type) {
fd38203a 742
a71fe8b8
LP
743 case SOURCE_IO:
744 if (s->io.fd >= 0)
745 source_io_unregister(s);
fd38203a 746
a71fe8b8 747 break;
6a0f1f6d 748
a71fe8b8 749 case SOURCE_TIME_REALTIME:
a8548816 750 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
751 case SOURCE_TIME_MONOTONIC:
752 case SOURCE_TIME_REALTIME_ALARM:
753 case SOURCE_TIME_BOOTTIME_ALARM: {
754 struct clock_data *d;
fd38203a 755
a71fe8b8
LP
756 d = event_get_clock_data(s->event, s->type);
757 assert(d);
758
759 prioq_remove(d->earliest, s, &s->time.earliest_index);
760 prioq_remove(d->latest, s, &s->time.latest_index);
212bbb17 761 d->needs_rearm = true;
a71fe8b8
LP
762 break;
763 }
764
765 case SOURCE_SIGNAL:
766 if (s->signal.sig > 0) {
9da4cb2b 767
a71fe8b8
LP
768 if (s->event->signal_sources)
769 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 770
9da4cb2b 771 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 772 }
fd38203a 773
a71fe8b8 774 break;
fd38203a 775
a71fe8b8
LP
776 case SOURCE_CHILD:
777 if (s->child.pid > 0) {
778 if (s->enabled != SD_EVENT_OFF) {
779 assert(s->event->n_enabled_child_sources > 0);
780 s->event->n_enabled_child_sources--;
4807d2d0 781 }
fd38203a 782
4a0b58c4 783 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 784 }
fd38203a 785
f8f3f926
LP
786 if (EVENT_SOURCE_WATCH_PIDFD(s))
787 source_child_pidfd_unregister(s);
788 else
789 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
790
a71fe8b8 791 break;
fd38203a 792
a71fe8b8
LP
793 case SOURCE_DEFER:
794 /* nothing */
795 break;
fd38203a 796
a71fe8b8
LP
797 case SOURCE_POST:
798 set_remove(s->event->post_sources, s);
799 break;
da7e457c 800
a71fe8b8
LP
801 case SOURCE_EXIT:
802 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
803 break;
0eb2e0e3 804
97ef5391
LP
805 case SOURCE_INOTIFY: {
806 struct inode_data *inode_data;
807
808 inode_data = s->inotify.inode_data;
809 if (inode_data) {
810 struct inotify_data *inotify_data;
811 assert_se(inotify_data = inode_data->inotify_data);
812
813 /* Detach this event source from the inode object */
814 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
815 s->inotify.inode_data = NULL;
816
817 if (s->pending) {
818 assert(inotify_data->n_pending > 0);
819 inotify_data->n_pending--;
820 }
821
822 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
823 * continued to being watched. That's because inotify doesn't really have an API for that: we
824 * can only change watch masks with access to the original inode either by fd or by path. But
825 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 826 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
827 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
828 * there), but given the need for open_by_handle_at() which is privileged and not universally
829 * available this would be quite an incomplete solution. Hence we go the other way, leave the
830 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
831 * anymore after reception. Yes, this sucks, but … Linux … */
832
833 /* Maybe release the inode data (and its inotify) */
834 event_gc_inode_data(s->event, inode_data);
835 }
836
837 break;
838 }
839
a71fe8b8
LP
840 default:
841 assert_not_reached("Wut? I shouldn't exist.");
842 }
6e9feda3 843
a71fe8b8
LP
844 if (s->pending)
845 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 846
a71fe8b8
LP
847 if (s->prepare)
848 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 849
e514aa1e 850 event = TAKE_PTR(s->event);
a71fe8b8
LP
851 LIST_REMOVE(sources, event->sources, s);
852 event->n_sources--;
fd38203a 853
f5982559
LP
854 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
855 * pidfd associated with this event source, which we'll do only on source_free(). */
856
a71fe8b8
LP
857 if (!s->floating)
858 sd_event_unref(event);
859}
860
861static void source_free(sd_event_source *s) {
862 assert(s);
fd38203a 863
a71fe8b8 864 source_disconnect(s);
ab93297c
NM
865
866 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
867 s->io.fd = safe_close(s->io.fd);
868
f8f3f926
LP
869 if (s->type == SOURCE_CHILD) {
870 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
871
872 if (s->child.process_owned) {
873
874 if (!s->child.exited) {
875 bool sent = false;
876
877 if (s->child.pidfd >= 0) {
878 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
879 if (errno == ESRCH) /* Already dead */
880 sent = true;
881 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
882 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
883 s->child.pid);
884 } else
885 sent = true;
886 }
887
888 if (!sent)
889 if (kill(s->child.pid, SIGKILL) < 0)
890 if (errno != ESRCH) /* Already dead */
891 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
892 s->child.pid);
893 }
894
895 if (!s->child.waited) {
896 siginfo_t si = {};
897
898 /* Reap the child if we can */
899 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
900 }
901 }
902
903 if (s->child.pidfd_owned)
904 s->child.pidfd = safe_close(s->child.pidfd);
905 }
906
15723a1d
LP
907 if (s->destroy_callback)
908 s->destroy_callback(s->userdata);
ab93297c 909
356779df 910 free(s->description);
fd38203a
LP
911 free(s);
912}
8c75fe17 913DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
914
915static int source_set_pending(sd_event_source *s, bool b) {
916 int r;
917
918 assert(s);
6203e07a 919 assert(s->type != SOURCE_EXIT);
fd38203a
LP
920
921 if (s->pending == b)
922 return 0;
923
924 s->pending = b;
925
926 if (b) {
927 s->pending_iteration = s->event->iteration;
928
929 r = prioq_put(s->event->pending, s, &s->pending_index);
930 if (r < 0) {
931 s->pending = false;
932 return r;
933 }
934 } else
935 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
936
e1951c16
MS
937 if (EVENT_SOURCE_IS_TIME(s->type))
938 event_source_time_prioq_reshuffle(s);
2576a19e 939
9da4cb2b
LP
940 if (s->type == SOURCE_SIGNAL && !b) {
941 struct signal_data *d;
942
943 d = hashmap_get(s->event->signal_data, &s->priority);
944 if (d && d->current == s)
945 d->current = NULL;
946 }
947
97ef5391
LP
948 if (s->type == SOURCE_INOTIFY) {
949
950 assert(s->inotify.inode_data);
951 assert(s->inotify.inode_data->inotify_data);
952
953 if (b)
954 s->inotify.inode_data->inotify_data->n_pending ++;
955 else {
956 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
957 s->inotify.inode_data->inotify_data->n_pending --;
958 }
959 }
960
fd38203a
LP
961 return 0;
962}
963
a71fe8b8 964static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
965 sd_event_source *s;
966
967 assert(e);
968
d08eb1fa 969 s = new(sd_event_source, 1);
fd38203a
LP
970 if (!s)
971 return NULL;
972
d08eb1fa
LP
973 *s = (struct sd_event_source) {
974 .n_ref = 1,
975 .event = e,
976 .floating = floating,
977 .type = type,
978 .pending_index = PRIOQ_IDX_NULL,
979 .prepare_index = PRIOQ_IDX_NULL,
980 };
a71fe8b8
LP
981
982 if (!floating)
983 sd_event_ref(e);
fd38203a 984
a71fe8b8 985 LIST_PREPEND(sources, e->sources, s);
313cefa1 986 e->n_sources++;
15b38f93 987
fd38203a
LP
988 return s;
989}
990
b9350e70
LP
991static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
992 assert(s);
993
994 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
995}
996
f7262a9f 997_public_ int sd_event_add_io(
fd38203a 998 sd_event *e,
151b9b96 999 sd_event_source **ret,
fd38203a
LP
1000 int fd,
1001 uint32_t events,
718db961 1002 sd_event_io_handler_t callback,
151b9b96 1003 void *userdata) {
fd38203a 1004
ec766a51 1005 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1006 int r;
1007
305f78bf 1008 assert_return(e, -EINVAL);
b937d761 1009 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1010 assert_return(fd >= 0, -EBADF);
2a16a986 1011 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1012 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1013 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1014
b9350e70
LP
1015 if (!callback)
1016 callback = io_exit_callback;
1017
a71fe8b8 1018 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1019 if (!s)
1020 return -ENOMEM;
1021
9da4cb2b 1022 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1023 s->io.fd = fd;
1024 s->io.events = events;
1025 s->io.callback = callback;
1026 s->userdata = userdata;
baf76283 1027 s->enabled = SD_EVENT_ON;
fd38203a 1028
baf76283 1029 r = source_io_register(s, s->enabled, events);
ec766a51 1030 if (r < 0)
050f74f2 1031 return r;
fd38203a 1032
a71fe8b8
LP
1033 if (ret)
1034 *ret = s;
ec766a51 1035 TAKE_PTR(s);
a71fe8b8 1036
fd38203a
LP
1037 return 0;
1038}
1039
52444dc4
LP
1040static void initialize_perturb(sd_event *e) {
1041 sd_id128_t bootid = {};
1042
1043 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1044 the same time within each minute/second/250ms, so that
52444dc4
LP
1045 events all across the system can be coalesced into a single
1046 CPU wakeup. However, let's take some system-specific
1047 randomness for this value, so that in a network of systems
1048 with synced clocks timer events are distributed a
1049 bit. Here, we calculate a perturbation usec offset from the
1050 boot ID. */
1051
3a43da28 1052 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1053 return;
1054
1055 if (sd_id128_get_boot(&bootid) >= 0)
1056 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1057}
1058
fd38203a
LP
1059static int event_setup_timer_fd(
1060 sd_event *e,
6a0f1f6d
LP
1061 struct clock_data *d,
1062 clockid_t clock) {
fd38203a 1063
fd38203a 1064 assert(e);
6a0f1f6d 1065 assert(d);
fd38203a 1066
6a0f1f6d 1067 if (_likely_(d->fd >= 0))
fd38203a
LP
1068 return 0;
1069
b44d87e2 1070 _cleanup_close_ int fd = -1;
b44d87e2 1071
6a0f1f6d 1072 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1073 if (fd < 0)
1074 return -errno;
1075
7fe2903c
LP
1076 fd = fd_move_above_stdio(fd);
1077
1eac7948 1078 struct epoll_event ev = {
a82f89aa
LP
1079 .events = EPOLLIN,
1080 .data.ptr = d,
1081 };
fd38203a 1082
15c689d7 1083 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1084 return -errno;
fd38203a 1085
b44d87e2 1086 d->fd = TAKE_FD(fd);
fd38203a
LP
1087 return 0;
1088}
1089
c4f1aff2
TG
1090static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1091 assert(s);
1092
1093 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1094}
1095
6a0f1f6d 1096_public_ int sd_event_add_time(
fd38203a 1097 sd_event *e,
151b9b96 1098 sd_event_source **ret,
6a0f1f6d 1099 clockid_t clock,
fd38203a 1100 uint64_t usec,
c2ba3ad6 1101 uint64_t accuracy,
718db961 1102 sd_event_time_handler_t callback,
151b9b96 1103 void *userdata) {
fd38203a 1104
6a0f1f6d 1105 EventSourceType type;
ec766a51 1106 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1107 struct clock_data *d;
fd38203a
LP
1108 int r;
1109
305f78bf 1110 assert_return(e, -EINVAL);
b937d761 1111 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 1112 assert_return(accuracy != (uint64_t) -1, -EINVAL);
da7e457c 1113 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1114 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1115
e475d10c
LP
1116 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1117 return -EOPNOTSUPP;
1118
1119 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1120 if (type < 0)
3411372e
LP
1121 return -EOPNOTSUPP;
1122
c4f1aff2
TG
1123 if (!callback)
1124 callback = time_exit_callback;
1125
6a0f1f6d
LP
1126 d = event_get_clock_data(e, type);
1127 assert(d);
c2ba3ad6 1128
c983e776
EV
1129 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1130 if (r < 0)
1131 return r;
fd38203a 1132
c983e776
EV
1133 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1134 if (r < 0)
1135 return r;
fd38203a 1136
6a0f1f6d
LP
1137 if (d->fd < 0) {
1138 r = event_setup_timer_fd(e, d, clock);
fd38203a
LP
1139 if (r < 0)
1140 return r;
1141 }
1142
a71fe8b8 1143 s = source_new(e, !ret, type);
fd38203a
LP
1144 if (!s)
1145 return -ENOMEM;
1146
1147 s->time.next = usec;
c2ba3ad6 1148 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1149 s->time.callback = callback;
da7e457c 1150 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
fd38203a 1151 s->userdata = userdata;
baf76283 1152 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1153
e07bbb7c
TG
1154 d->needs_rearm = true;
1155
6a0f1f6d 1156 r = prioq_put(d->earliest, s, &s->time.earliest_index);
c2ba3ad6 1157 if (r < 0)
ec766a51 1158 return r;
c2ba3ad6 1159
6a0f1f6d 1160 r = prioq_put(d->latest, s, &s->time.latest_index);
c2ba3ad6 1161 if (r < 0)
ec766a51 1162 return r;
fd38203a 1163
a71fe8b8
LP
1164 if (ret)
1165 *ret = s;
ec766a51 1166 TAKE_PTR(s);
a71fe8b8 1167
fd38203a
LP
1168 return 0;
1169}
1170
d6a83dc4
LP
1171_public_ int sd_event_add_time_relative(
1172 sd_event *e,
1173 sd_event_source **ret,
1174 clockid_t clock,
1175 uint64_t usec,
1176 uint64_t accuracy,
1177 sd_event_time_handler_t callback,
1178 void *userdata) {
1179
1180 usec_t t;
1181 int r;
1182
1183 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1184 * checks for overflow. */
1185
1186 r = sd_event_now(e, clock, &t);
1187 if (r < 0)
1188 return r;
1189
1190 if (usec >= USEC_INFINITY - t)
1191 return -EOVERFLOW;
1192
1193 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1194}
1195
59bc1fd7
LP
1196static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1197 assert(s);
1198
1199 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1200}
1201
f7262a9f 1202_public_ int sd_event_add_signal(
305f78bf 1203 sd_event *e,
151b9b96 1204 sd_event_source **ret,
305f78bf 1205 int sig,
718db961 1206 sd_event_signal_handler_t callback,
151b9b96 1207 void *userdata) {
305f78bf 1208
ec766a51 1209 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1210 struct signal_data *d;
fd38203a
LP
1211 int r;
1212
305f78bf 1213 assert_return(e, -EINVAL);
b937d761 1214 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1215 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1216 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1217 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1218
59bc1fd7
LP
1219 if (!callback)
1220 callback = signal_exit_callback;
1221
d1b75241
LP
1222 r = signal_is_blocked(sig);
1223 if (r < 0)
1224 return r;
1225 if (r == 0)
3022d74b
LP
1226 return -EBUSY;
1227
fd38203a
LP
1228 if (!e->signal_sources) {
1229 e->signal_sources = new0(sd_event_source*, _NSIG);
1230 if (!e->signal_sources)
1231 return -ENOMEM;
1232 } else if (e->signal_sources[sig])
1233 return -EBUSY;
1234
a71fe8b8 1235 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1236 if (!s)
1237 return -ENOMEM;
1238
1239 s->signal.sig = sig;
1240 s->signal.callback = callback;
1241 s->userdata = userdata;
baf76283 1242 s->enabled = SD_EVENT_ON;
fd38203a
LP
1243
1244 e->signal_sources[sig] = s;
fd38203a 1245
9da4cb2b 1246 r = event_make_signal_data(e, sig, &d);
ec766a51 1247 if (r < 0)
9da4cb2b 1248 return r;
fd38203a 1249
f1f00dbb
LP
1250 /* Use the signal name as description for the event source by default */
1251 (void) sd_event_source_set_description(s, signal_to_string(sig));
1252
a71fe8b8
LP
1253 if (ret)
1254 *ret = s;
ec766a51 1255 TAKE_PTR(s);
a71fe8b8 1256
fd38203a
LP
1257 return 0;
1258}
1259
b9350e70
LP
1260static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1261 assert(s);
1262
1263 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1264}
1265
f8f3f926
LP
1266static bool shall_use_pidfd(void) {
1267 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1268 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1269}
1270
f7262a9f 1271_public_ int sd_event_add_child(
305f78bf 1272 sd_event *e,
151b9b96 1273 sd_event_source **ret,
305f78bf
LP
1274 pid_t pid,
1275 int options,
718db961 1276 sd_event_child_handler_t callback,
151b9b96 1277 void *userdata) {
305f78bf 1278
ec766a51 1279 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1280 int r;
1281
305f78bf 1282 assert_return(e, -EINVAL);
b937d761 1283 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1284 assert_return(pid > 1, -EINVAL);
1285 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1286 assert_return(options != 0, -EINVAL);
da7e457c 1287 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1288 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1289
b9350e70
LP
1290 if (!callback)
1291 callback = child_exit_callback;
1292
ee880b37
LP
1293 if (e->n_enabled_child_sources == 0) {
1294 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1295 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1296 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1297 * take effect.
1298 *
1299 * (As an optimization we only do this check on the first child event source created.) */
1300 r = signal_is_blocked(SIGCHLD);
1301 if (r < 0)
1302 return r;
1303 if (r == 0)
1304 return -EBUSY;
1305 }
1306
d5099efc 1307 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1308 if (r < 0)
1309 return r;
1310
4a0b58c4 1311 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1312 return -EBUSY;
1313
a71fe8b8 1314 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1315 if (!s)
1316 return -ENOMEM;
1317
f8f3f926 1318 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1319 s->child.pid = pid;
1320 s->child.options = options;
1321 s->child.callback = callback;
1322 s->userdata = userdata;
baf76283 1323 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1324
f8f3f926
LP
1325 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1326 * pin the PID, and make regular waitid() handling race-free. */
1327
1328 if (shall_use_pidfd()) {
1329 s->child.pidfd = pidfd_open(s->child.pid, 0);
1330 if (s->child.pidfd < 0) {
1331 /* Propagate errors unless the syscall is not supported or blocked */
1332 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1333 return -errno;
1334 } else
1335 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1336 } else
1337 s->child.pidfd = -1;
1338
4a0b58c4 1339 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1340 if (r < 0)
fd38203a 1341 return r;
fd38203a 1342
313cefa1 1343 e->n_enabled_child_sources++;
fd38203a 1344
f8f3f926
LP
1345 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1346 /* We have a pidfd and we only want to watch for exit */
1347
1348 r = source_child_pidfd_register(s, s->enabled);
1349 if (r < 0) {
1350 e->n_enabled_child_sources--;
1351 return r;
1352 }
1353 } else {
1354 /* We have no pidfd or we shall wait for some other event than WEXITED */
fd38203a 1355
f8f3f926
LP
1356 r = event_make_signal_data(e, SIGCHLD, NULL);
1357 if (r < 0) {
1358 e->n_enabled_child_sources--;
1359 return r;
1360 }
1361
1362 e->need_process_child = true;
1363 }
c2ba3ad6 1364
a71fe8b8
LP
1365 if (ret)
1366 *ret = s;
f8f3f926 1367
ec766a51 1368 TAKE_PTR(s);
f8f3f926
LP
1369 return 0;
1370}
1371
1372_public_ int sd_event_add_child_pidfd(
1373 sd_event *e,
1374 sd_event_source **ret,
1375 int pidfd,
1376 int options,
1377 sd_event_child_handler_t callback,
1378 void *userdata) {
1379
1380
1381 _cleanup_(source_freep) sd_event_source *s = NULL;
1382 pid_t pid;
1383 int r;
1384
1385 assert_return(e, -EINVAL);
1386 assert_return(e = event_resolve(e), -ENOPKG);
1387 assert_return(pidfd >= 0, -EBADF);
1388 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1389 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1390 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1391 assert_return(!event_pid_changed(e), -ECHILD);
1392
b9350e70
LP
1393 if (!callback)
1394 callback = child_exit_callback;
1395
ee880b37
LP
1396 if (e->n_enabled_child_sources == 0) {
1397 r = signal_is_blocked(SIGCHLD);
1398 if (r < 0)
1399 return r;
1400 if (r == 0)
1401 return -EBUSY;
1402 }
1403
f8f3f926
LP
1404 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1405 if (r < 0)
1406 return r;
1407
1408 r = pidfd_get_pid(pidfd, &pid);
1409 if (r < 0)
1410 return r;
1411
1412 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1413 return -EBUSY;
1414
1415 s = source_new(e, !ret, SOURCE_CHILD);
1416 if (!s)
1417 return -ENOMEM;
1418
1419 s->wakeup = WAKEUP_EVENT_SOURCE;
1420 s->child.pidfd = pidfd;
1421 s->child.pid = pid;
1422 s->child.options = options;
1423 s->child.callback = callback;
1424 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1425 s->userdata = userdata;
1426 s->enabled = SD_EVENT_ONESHOT;
1427
1428 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1429 if (r < 0)
1430 return r;
1431
1432 e->n_enabled_child_sources++;
1433
1434 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1435 /* We only want to watch for WEXITED */
1436
1437 r = source_child_pidfd_register(s, s->enabled);
1438 if (r < 0) {
1439 e->n_enabled_child_sources--;
1440 return r;
1441 }
1442 } else {
1443 /* We shall wait for some other event than WEXITED */
1444
1445 r = event_make_signal_data(e, SIGCHLD, NULL);
1446 if (r < 0) {
1447 e->n_enabled_child_sources--;
1448 return r;
1449 }
a71fe8b8 1450
f8f3f926
LP
1451 e->need_process_child = true;
1452 }
1453
1454 if (ret)
1455 *ret = s;
1456
1457 TAKE_PTR(s);
fd38203a
LP
1458 return 0;
1459}
1460
b9350e70
LP
1461static int generic_exit_callback(sd_event_source *s, void *userdata) {
1462 assert(s);
1463
1464 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1465}
1466
f7262a9f 1467_public_ int sd_event_add_defer(
305f78bf 1468 sd_event *e,
151b9b96 1469 sd_event_source **ret,
718db961 1470 sd_event_handler_t callback,
151b9b96 1471 void *userdata) {
305f78bf 1472
ec766a51 1473 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1474 int r;
1475
305f78bf 1476 assert_return(e, -EINVAL);
b937d761 1477 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1478 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1479 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1480
b9350e70
LP
1481 if (!callback)
1482 callback = generic_exit_callback;
1483
a71fe8b8 1484 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1485 if (!s)
1486 return -ENOMEM;
1487
1488 s->defer.callback = callback;
1489 s->userdata = userdata;
baf76283 1490 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1491
1492 r = source_set_pending(s, true);
ec766a51 1493 if (r < 0)
fd38203a 1494 return r;
fd38203a 1495
a71fe8b8
LP
1496 if (ret)
1497 *ret = s;
ec766a51 1498 TAKE_PTR(s);
a71fe8b8 1499
fd38203a
LP
1500 return 0;
1501}
1502
6e9feda3
LP
1503_public_ int sd_event_add_post(
1504 sd_event *e,
1505 sd_event_source **ret,
1506 sd_event_handler_t callback,
1507 void *userdata) {
1508
ec766a51 1509 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1510 int r;
1511
1512 assert_return(e, -EINVAL);
b937d761 1513 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1514 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1515 assert_return(!event_pid_changed(e), -ECHILD);
1516
b9350e70
LP
1517 if (!callback)
1518 callback = generic_exit_callback;
1519
a71fe8b8 1520 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1521 if (!s)
1522 return -ENOMEM;
1523
1524 s->post.callback = callback;
1525 s->userdata = userdata;
1526 s->enabled = SD_EVENT_ON;
1527
de7fef4b 1528 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1529 if (r < 0)
6e9feda3 1530 return r;
de7fef4b 1531 assert(r > 0);
6e9feda3 1532
a71fe8b8
LP
1533 if (ret)
1534 *ret = s;
ec766a51 1535 TAKE_PTR(s);
a71fe8b8 1536
6e9feda3
LP
1537 return 0;
1538}
1539
6203e07a 1540_public_ int sd_event_add_exit(
305f78bf 1541 sd_event *e,
151b9b96 1542 sd_event_source **ret,
718db961 1543 sd_event_handler_t callback,
151b9b96 1544 void *userdata) {
305f78bf 1545
ec766a51 1546 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1547 int r;
1548
1549 assert_return(e, -EINVAL);
b937d761 1550 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1551 assert_return(callback, -EINVAL);
1552 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1553 assert_return(!event_pid_changed(e), -ECHILD);
1554
c983e776
EV
1555 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1556 if (r < 0)
1557 return r;
da7e457c 1558
a71fe8b8 1559 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1560 if (!s)
da7e457c 1561 return -ENOMEM;
fd38203a 1562
6203e07a 1563 s->exit.callback = callback;
da7e457c 1564 s->userdata = userdata;
6203e07a 1565 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1566 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1567
6203e07a 1568 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1569 if (r < 0)
da7e457c 1570 return r;
da7e457c 1571
a71fe8b8
LP
1572 if (ret)
1573 *ret = s;
ec766a51 1574 TAKE_PTR(s);
a71fe8b8 1575
da7e457c
LP
1576 return 0;
1577}
1578
97ef5391
LP
1579static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1580 assert(e);
1581
1582 if (!d)
1583 return;
1584
1585 assert(hashmap_isempty(d->inodes));
1586 assert(hashmap_isempty(d->wd));
1587
1588 if (d->buffer_filled > 0)
1589 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1590
1591 hashmap_free(d->inodes);
1592 hashmap_free(d->wd);
1593
1594 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1595
1596 if (d->fd >= 0) {
1597 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1598 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1599
1600 safe_close(d->fd);
1601 }
1602 free(d);
1603}
1604
1605static int event_make_inotify_data(
1606 sd_event *e,
1607 int64_t priority,
1608 struct inotify_data **ret) {
1609
1610 _cleanup_close_ int fd = -1;
1611 struct inotify_data *d;
97ef5391
LP
1612 int r;
1613
1614 assert(e);
1615
1616 d = hashmap_get(e->inotify_data, &priority);
1617 if (d) {
1618 if (ret)
1619 *ret = d;
1620 return 0;
1621 }
1622
1623 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1624 if (fd < 0)
1625 return -errno;
1626
1627 fd = fd_move_above_stdio(fd);
1628
1629 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1630 if (r < 0)
1631 return r;
1632
1633 d = new(struct inotify_data, 1);
1634 if (!d)
1635 return -ENOMEM;
1636
1637 *d = (struct inotify_data) {
1638 .wakeup = WAKEUP_INOTIFY_DATA,
1639 .fd = TAKE_FD(fd),
1640 .priority = priority,
1641 };
1642
1643 r = hashmap_put(e->inotify_data, &d->priority, d);
1644 if (r < 0) {
1645 d->fd = safe_close(d->fd);
1646 free(d);
1647 return r;
1648 }
1649
1eac7948 1650 struct epoll_event ev = {
97ef5391
LP
1651 .events = EPOLLIN,
1652 .data.ptr = d,
1653 };
1654
1655 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1656 r = -errno;
1657 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1658 * remove the fd from the epoll first, which we don't want as we couldn't
1659 * add it in the first place. */
1660 event_free_inotify_data(e, d);
1661 return r;
1662 }
1663
1664 if (ret)
1665 *ret = d;
1666
1667 return 1;
1668}
1669
7a08d314 1670static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1671 int r;
97ef5391
LP
1672
1673 assert(x);
1674 assert(y);
1675
90c88092
YW
1676 r = CMP(x->dev, y->dev);
1677 if (r != 0)
1678 return r;
97ef5391 1679
6dd91b36 1680 return CMP(x->ino, y->ino);
97ef5391
LP
1681}
1682
7a08d314
YW
1683static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1684 assert(d);
97ef5391
LP
1685
1686 siphash24_compress(&d->dev, sizeof(d->dev), state);
1687 siphash24_compress(&d->ino, sizeof(d->ino), state);
1688}
1689
7a08d314 1690DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1691
1692static void event_free_inode_data(
1693 sd_event *e,
1694 struct inode_data *d) {
1695
1696 assert(e);
1697
1698 if (!d)
1699 return;
1700
1701 assert(!d->event_sources);
1702
1703 if (d->fd >= 0) {
1704 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1705 safe_close(d->fd);
1706 }
1707
1708 if (d->inotify_data) {
1709
1710 if (d->wd >= 0) {
1711 if (d->inotify_data->fd >= 0) {
1712 /* So here's a problem. At the time this runs the watch descriptor might already be
1713 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1714 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1715 * likely case to happen. */
1716
1717 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1718 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1719 }
1720
1721 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1722 }
1723
1724 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1725 }
1726
1727 free(d);
1728}
1729
1730static void event_gc_inode_data(
1731 sd_event *e,
1732 struct inode_data *d) {
1733
1734 struct inotify_data *inotify_data;
1735
1736 assert(e);
1737
1738 if (!d)
1739 return;
1740
1741 if (d->event_sources)
1742 return;
1743
1744 inotify_data = d->inotify_data;
1745 event_free_inode_data(e, d);
1746
1747 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1748 event_free_inotify_data(e, inotify_data);
1749}
1750
1751static int event_make_inode_data(
1752 sd_event *e,
1753 struct inotify_data *inotify_data,
1754 dev_t dev,
1755 ino_t ino,
1756 struct inode_data **ret) {
1757
1758 struct inode_data *d, key;
1759 int r;
1760
1761 assert(e);
1762 assert(inotify_data);
1763
1764 key = (struct inode_data) {
1765 .ino = ino,
1766 .dev = dev,
1767 };
1768
1769 d = hashmap_get(inotify_data->inodes, &key);
1770 if (d) {
1771 if (ret)
1772 *ret = d;
1773
1774 return 0;
1775 }
1776
1777 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1778 if (r < 0)
1779 return r;
1780
1781 d = new(struct inode_data, 1);
1782 if (!d)
1783 return -ENOMEM;
1784
1785 *d = (struct inode_data) {
1786 .dev = dev,
1787 .ino = ino,
1788 .wd = -1,
1789 .fd = -1,
1790 .inotify_data = inotify_data,
1791 };
1792
1793 r = hashmap_put(inotify_data->inodes, d, d);
1794 if (r < 0) {
1795 free(d);
1796 return r;
1797 }
1798
1799 if (ret)
1800 *ret = d;
1801
1802 return 1;
1803}
1804
1805static uint32_t inode_data_determine_mask(struct inode_data *d) {
1806 bool excl_unlink = true;
1807 uint32_t combined = 0;
1808 sd_event_source *s;
1809
1810 assert(d);
1811
1812 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1813 * the IN_EXCL_UNLINK flag is ANDed instead.
1814 *
1815 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1816 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1817 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1818 * events we don't care for client-side. */
1819
1820 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1821
1822 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1823 excl_unlink = false;
1824
1825 combined |= s->inotify.mask;
1826 }
1827
1828 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1829}
1830
1831static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1832 uint32_t combined_mask;
1833 int wd, r;
1834
1835 assert(d);
1836 assert(d->fd >= 0);
1837
1838 combined_mask = inode_data_determine_mask(d);
1839
1840 if (d->wd >= 0 && combined_mask == d->combined_mask)
1841 return 0;
1842
1843 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1844 if (r < 0)
1845 return r;
1846
1847 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1848 if (wd < 0)
1849 return -errno;
1850
1851 if (d->wd < 0) {
1852 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1853 if (r < 0) {
1854 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1855 return r;
1856 }
1857
1858 d->wd = wd;
1859
1860 } else if (d->wd != wd) {
1861
1862 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1863 (void) inotify_rm_watch(d->fd, wd);
1864 return -EINVAL;
1865 }
1866
1867 d->combined_mask = combined_mask;
1868 return 1;
1869}
1870
b9350e70
LP
1871static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1872 assert(s);
1873
1874 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1875}
1876
97ef5391
LP
1877_public_ int sd_event_add_inotify(
1878 sd_event *e,
1879 sd_event_source **ret,
1880 const char *path,
1881 uint32_t mask,
1882 sd_event_inotify_handler_t callback,
1883 void *userdata) {
1884
97ef5391
LP
1885 struct inotify_data *inotify_data = NULL;
1886 struct inode_data *inode_data = NULL;
1887 _cleanup_close_ int fd = -1;
8c75fe17 1888 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
1889 struct stat st;
1890 int r;
1891
1892 assert_return(e, -EINVAL);
1893 assert_return(e = event_resolve(e), -ENOPKG);
1894 assert_return(path, -EINVAL);
97ef5391
LP
1895 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1896 assert_return(!event_pid_changed(e), -ECHILD);
1897
b9350e70
LP
1898 if (!callback)
1899 callback = inotify_exit_callback;
1900
97ef5391
LP
1901 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1902 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1903 * the user can't use them for us. */
1904 if (mask & IN_MASK_ADD)
1905 return -EINVAL;
1906
1907 fd = open(path, O_PATH|O_CLOEXEC|
1908 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1909 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1910 if (fd < 0)
1911 return -errno;
1912
1913 if (fstat(fd, &st) < 0)
1914 return -errno;
1915
1916 s = source_new(e, !ret, SOURCE_INOTIFY);
1917 if (!s)
1918 return -ENOMEM;
1919
1920 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1921 s->inotify.mask = mask;
1922 s->inotify.callback = callback;
1923 s->userdata = userdata;
1924
1925 /* Allocate an inotify object for this priority, and an inode object within it */
1926 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1927 if (r < 0)
8c75fe17 1928 return r;
97ef5391
LP
1929
1930 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17
ZJS
1931 if (r < 0) {
1932 event_free_inotify_data(e, inotify_data);
1933 return r;
1934 }
97ef5391
LP
1935
1936 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1937 * the event source, until then, for which we need the original inode. */
1938 if (inode_data->fd < 0) {
1939 inode_data->fd = TAKE_FD(fd);
1940 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1941 }
1942
1943 /* Link our event source to the inode data object */
1944 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1945 s->inotify.inode_data = inode_data;
1946
97ef5391
LP
1947 /* Actually realize the watch now */
1948 r = inode_data_realize_watch(e, inode_data);
1949 if (r < 0)
8c75fe17 1950 return r;
97ef5391
LP
1951
1952 (void) sd_event_source_set_description(s, path);
1953
1954 if (ret)
1955 *ret = s;
8c75fe17 1956 TAKE_PTR(s);
97ef5391
LP
1957
1958 return 0;
97ef5391
LP
1959}
1960
8301aa0b 1961static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
1962 if (!s)
1963 return NULL;
da7e457c 1964
8301aa0b
YW
1965 /* Here's a special hack: when we are called from a
1966 * dispatch handler we won't free the event source
1967 * immediately, but we will detach the fd from the
1968 * epoll. This way it is safe for the caller to unref
1969 * the event source and immediately close the fd, but
1970 * we still retain a valid event source object after
1971 * the callback. */
fd38203a 1972
8301aa0b
YW
1973 if (s->dispatching) {
1974 if (s->type == SOURCE_IO)
1975 source_io_unregister(s);
fd38203a 1976
8301aa0b
YW
1977 source_disconnect(s);
1978 } else
1979 source_free(s);
fd38203a
LP
1980
1981 return NULL;
1982}
1983
8301aa0b
YW
1984DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1985
356779df 1986_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 1987 assert_return(s, -EINVAL);
f4b2933e 1988 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1989
356779df 1990 return free_and_strdup(&s->description, description);
f7f53e9e
TG
1991}
1992
356779df 1993_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 1994 assert_return(s, -EINVAL);
356779df 1995 assert_return(description, -EINVAL);
f4b2933e 1996 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1997
7d92a1a4
ZJS
1998 if (!s->description)
1999 return -ENXIO;
2000
356779df 2001 *description = s->description;
f7f53e9e
TG
2002 return 0;
2003}
2004
adcc4ca3 2005_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 2006 assert_return(s, NULL);
eaa3cbef
LP
2007
2008 return s->event;
2009}
2010
f7262a9f 2011_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 2012 assert_return(s, -EINVAL);
6203e07a 2013 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 2014 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2015 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2016
2017 return s->pending;
2018}
2019
f7262a9f 2020_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2021 assert_return(s, -EINVAL);
2022 assert_return(s->type == SOURCE_IO, -EDOM);
2023 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2024
2025 return s->io.fd;
2026}
2027
30caf8f3
LP
2028_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2029 int r;
2030
2031 assert_return(s, -EINVAL);
8ac43fee 2032 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2033 assert_return(s->type == SOURCE_IO, -EDOM);
2034 assert_return(!event_pid_changed(s->event), -ECHILD);
2035
2036 if (s->io.fd == fd)
2037 return 0;
2038
2039 if (s->enabled == SD_EVENT_OFF) {
2040 s->io.fd = fd;
2041 s->io.registered = false;
2042 } else {
2043 int saved_fd;
2044
2045 saved_fd = s->io.fd;
2046 assert(s->io.registered);
2047
2048 s->io.fd = fd;
2049 s->io.registered = false;
2050
2051 r = source_io_register(s, s->enabled, s->io.events);
2052 if (r < 0) {
2053 s->io.fd = saved_fd;
2054 s->io.registered = true;
2055 return r;
2056 }
2057
5a795bff 2058 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2059 }
2060
2061 return 0;
2062}
2063
ab93297c
NM
2064_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2065 assert_return(s, -EINVAL);
2066 assert_return(s->type == SOURCE_IO, -EDOM);
2067
2068 return s->io.owned;
2069}
2070
2071_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2072 assert_return(s, -EINVAL);
2073 assert_return(s->type == SOURCE_IO, -EDOM);
2074
2075 s->io.owned = own;
2076 return 0;
2077}
2078
f7262a9f 2079_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2080 assert_return(s, -EINVAL);
2081 assert_return(events, -EINVAL);
2082 assert_return(s->type == SOURCE_IO, -EDOM);
2083 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2084
2085 *events = s->io.events;
2086 return 0;
2087}
2088
f7262a9f 2089_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2090 int r;
2091
305f78bf
LP
2092 assert_return(s, -EINVAL);
2093 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2094 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2095 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2096 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2097
b63c8d4f
DH
2098 /* edge-triggered updates are never skipped, so we can reset edges */
2099 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2100 return 0;
2101
2a0dc6cd
LP
2102 r = source_set_pending(s, false);
2103 if (r < 0)
2104 return r;
2105
baf76283 2106 if (s->enabled != SD_EVENT_OFF) {
e4715127 2107 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2108 if (r < 0)
2109 return r;
2110 }
2111
2112 s->io.events = events;
2113
2114 return 0;
2115}
2116
f7262a9f 2117_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2118 assert_return(s, -EINVAL);
2119 assert_return(revents, -EINVAL);
2120 assert_return(s->type == SOURCE_IO, -EDOM);
2121 assert_return(s->pending, -ENODATA);
2122 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2123
2124 *revents = s->io.revents;
2125 return 0;
2126}
2127
f7262a9f 2128_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2129 assert_return(s, -EINVAL);
2130 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2131 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2132
2133 return s->signal.sig;
2134}
2135
31927c16 2136_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2137 assert_return(s, -EINVAL);
2138 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2139
6680b8d1
ME
2140 *priority = s->priority;
2141 return 0;
fd38203a
LP
2142}
2143
31927c16 2144_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2145 bool rm_inotify = false, rm_inode = false;
2146 struct inotify_data *new_inotify_data = NULL;
2147 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2148 int r;
2149
305f78bf 2150 assert_return(s, -EINVAL);
da7e457c 2151 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2152 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2153
2154 if (s->priority == priority)
2155 return 0;
2156
97ef5391
LP
2157 if (s->type == SOURCE_INOTIFY) {
2158 struct inode_data *old_inode_data;
2159
2160 assert(s->inotify.inode_data);
2161 old_inode_data = s->inotify.inode_data;
2162
2163 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2164 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2165 * events we allow priority changes only until the first following iteration. */
2166 if (old_inode_data->fd < 0)
2167 return -EOPNOTSUPP;
2168
2169 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2170 if (r < 0)
2171 return r;
2172 rm_inotify = r > 0;
2173
2174 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2175 if (r < 0)
2176 goto fail;
2177 rm_inode = r > 0;
2178
2179 if (new_inode_data->fd < 0) {
2180 /* Duplicate the fd for the new inode object if we don't have any yet */
2181 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2182 if (new_inode_data->fd < 0) {
2183 r = -errno;
2184 goto fail;
2185 }
2186
2187 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2188 }
2189
2190 /* Move the event source to the new inode data structure */
2191 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2192 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2193 s->inotify.inode_data = new_inode_data;
2194
2195 /* Now create the new watch */
2196 r = inode_data_realize_watch(s->event, new_inode_data);
2197 if (r < 0) {
2198 /* Move it back */
2199 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2200 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2201 s->inotify.inode_data = old_inode_data;
2202 goto fail;
2203 }
2204
2205 s->priority = priority;
2206
2207 event_gc_inode_data(s->event, old_inode_data);
2208
2209 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
9da4cb2b
LP
2210 struct signal_data *old, *d;
2211
2212 /* Move us from the signalfd belonging to the old
2213 * priority to the signalfd of the new priority */
2214
2215 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2216
2217 s->priority = priority;
2218
2219 r = event_make_signal_data(s->event, s->signal.sig, &d);
2220 if (r < 0) {
2221 s->priority = old->priority;
2222 return r;
2223 }
2224
2225 event_unmask_signal_data(s->event, old, s->signal.sig);
2226 } else
2227 s->priority = priority;
fd38203a 2228
e1951c16 2229 event_source_pp_prioq_reshuffle(s);
fd38203a 2230
6203e07a
LP
2231 if (s->type == SOURCE_EXIT)
2232 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2233
fd38203a 2234 return 0;
97ef5391
LP
2235
2236fail:
2237 if (rm_inode)
2238 event_free_inode_data(s->event, new_inode_data);
2239
2240 if (rm_inotify)
2241 event_free_inotify_data(s->event, new_inotify_data);
2242
2243 return r;
fd38203a
LP
2244}
2245
f7262a9f 2246_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
305f78bf 2247 assert_return(s, -EINVAL);
305f78bf 2248 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2249
08c1eb0e
ZJS
2250 if (m)
2251 *m = s->enabled;
2252 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2253}
2254
ddfde737 2255static int event_source_disable(sd_event_source *s) {
fd38203a
LP
2256 int r;
2257
ddfde737
LP
2258 assert(s);
2259 assert(s->enabled != SD_EVENT_OFF);
fd38203a 2260
ddfde737
LP
2261 /* Unset the pending flag when this event source is disabled */
2262 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2263 r = source_set_pending(s, false);
2264 if (r < 0)
2265 return r;
2266 }
cc567911 2267
ddfde737 2268 s->enabled = SD_EVENT_OFF;
fd38203a 2269
ddfde737 2270 switch (s->type) {
fd38203a 2271
ddfde737
LP
2272 case SOURCE_IO:
2273 source_io_unregister(s);
2274 break;
ac989a78 2275
ddfde737
LP
2276 case SOURCE_TIME_REALTIME:
2277 case SOURCE_TIME_BOOTTIME:
2278 case SOURCE_TIME_MONOTONIC:
2279 case SOURCE_TIME_REALTIME_ALARM:
2280 case SOURCE_TIME_BOOTTIME_ALARM:
2281 event_source_time_prioq_reshuffle(s);
2282 break;
fd38203a 2283
ddfde737
LP
2284 case SOURCE_SIGNAL:
2285 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2286 break;
fd38203a 2287
ddfde737
LP
2288 case SOURCE_CHILD:
2289 assert(s->event->n_enabled_child_sources > 0);
2290 s->event->n_enabled_child_sources--;
fd38203a 2291
ddfde737
LP
2292 if (EVENT_SOURCE_WATCH_PIDFD(s))
2293 source_child_pidfd_unregister(s);
2294 else
2295 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2296 break;
4807d2d0 2297
ddfde737
LP
2298 case SOURCE_EXIT:
2299 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2300 break;
fd38203a 2301
ddfde737
LP
2302 case SOURCE_DEFER:
2303 case SOURCE_POST:
2304 case SOURCE_INOTIFY:
2305 break;
fd38203a 2306
ddfde737
LP
2307 default:
2308 assert_not_reached("Wut? I shouldn't exist.");
2309 }
fd38203a 2310
ddfde737
LP
2311 return 0;
2312}
f8f3f926 2313
ddfde737
LP
2314static int event_source_enable(sd_event_source *s, int m) {
2315 int r;
fd38203a 2316
ddfde737
LP
2317 assert(s);
2318 assert(IN_SET(m, SD_EVENT_ON, SD_EVENT_ONESHOT));
2319 assert(s->enabled == SD_EVENT_OFF);
305f78bf 2320
ddfde737
LP
2321 /* Unset the pending flag when this event source is enabled */
2322 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2323 r = source_set_pending(s, false);
2324 if (r < 0)
2325 return r;
2326 }
9d3e3aa5 2327
ddfde737 2328 s->enabled = m;
fd38203a 2329
ddfde737 2330 switch (s->type) {
ac989a78 2331
ddfde737
LP
2332 case SOURCE_IO:
2333 r = source_io_register(s, m, s->io.events);
2334 if (r < 0) {
2335 s->enabled = SD_EVENT_OFF;
2336 return r;
ac989a78
LP
2337 }
2338
ddfde737 2339 break;
fd38203a 2340
ddfde737
LP
2341 case SOURCE_TIME_REALTIME:
2342 case SOURCE_TIME_BOOTTIME:
2343 case SOURCE_TIME_MONOTONIC:
2344 case SOURCE_TIME_REALTIME_ALARM:
2345 case SOURCE_TIME_BOOTTIME_ALARM:
2346 event_source_time_prioq_reshuffle(s);
2347 break;
fd38203a 2348
ddfde737
LP
2349 case SOURCE_SIGNAL:
2350 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2351 if (r < 0) {
2352 s->enabled = SD_EVENT_OFF;
2353 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2354 return r;
2355 }
fd38203a 2356
ddfde737 2357 break;
fd38203a 2358
ddfde737
LP
2359 case SOURCE_CHILD:
2360 s->event->n_enabled_child_sources++;
4807d2d0 2361
ddfde737
LP
2362 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2363 /* yes, we have pidfd */
9da4cb2b 2364
ddfde737 2365 r = source_child_pidfd_register(s, s->enabled);
9da4cb2b
LP
2366 if (r < 0) {
2367 s->enabled = SD_EVENT_OFF;
ddfde737 2368 s->event->n_enabled_child_sources--;
9da4cb2b
LP
2369 return r;
2370 }
ddfde737
LP
2371 } else {
2372 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2373
ddfde737
LP
2374 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2375 if (r < 0) {
2376 s->enabled = SD_EVENT_OFF;
2377 s->event->n_enabled_child_sources--;
2378 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2379 return r;
2380 }
2381 }
fd38203a 2382
ddfde737 2383 break;
4807d2d0 2384
ddfde737
LP
2385 case SOURCE_EXIT:
2386 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2387 break;
7a0d4a3d 2388
ddfde737
LP
2389 case SOURCE_DEFER:
2390 case SOURCE_POST:
2391 case SOURCE_INOTIFY:
2392 break;
9da4cb2b 2393
ddfde737
LP
2394 default:
2395 assert_not_reached("Wut? I shouldn't exist.");
2396 }
f8f3f926 2397
ddfde737
LP
2398 return 0;
2399}
2400
2401_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2402 int r;
9da4cb2b 2403
ddfde737
LP
2404 assert_return(s, -EINVAL);
2405 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2406 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2407
ddfde737
LP
2408 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2409 if (s->event->state == SD_EVENT_FINISHED)
2410 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2411
ddfde737
LP
2412 if (s->enabled == m) /* No change? */
2413 return 0;
9d3e3aa5 2414
ddfde737
LP
2415 if (m == SD_EVENT_OFF)
2416 r = event_source_disable(s);
2417 else {
2418 if (s->enabled != SD_EVENT_OFF) {
2419 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2420 * event source is already enabled after all. */
2421 s->enabled = m;
2422 return 0;
fd38203a 2423 }
ddfde737
LP
2424
2425 r = event_source_enable(s, m);
fd38203a 2426 }
ddfde737
LP
2427 if (r < 0)
2428 return r;
fd38203a 2429
e1951c16 2430 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2431 return 0;
2432}
2433
f7262a9f 2434_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2435 assert_return(s, -EINVAL);
2436 assert_return(usec, -EINVAL);
6a0f1f6d 2437 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2438 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2439
2440 *usec = s->time.next;
2441 return 0;
2442}
2443
f7262a9f 2444_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2445 int r;
6a0f1f6d 2446
305f78bf 2447 assert_return(s, -EINVAL);
6a0f1f6d 2448 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2449 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2450 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2451
2a0dc6cd
LP
2452 r = source_set_pending(s, false);
2453 if (r < 0)
2454 return r;
2576a19e 2455
2a0dc6cd 2456 s->time.next = usec;
fd38203a 2457
e1951c16 2458 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2459 return 0;
2460}
2461
d6a83dc4
LP
2462_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2463 usec_t t;
2464 int r;
2465
2466 assert_return(s, -EINVAL);
2467 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2468
2469 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2470 if (r < 0)
2471 return r;
2472
2473 if (usec >= USEC_INFINITY - t)
2474 return -EOVERFLOW;
2475
2476 return sd_event_source_set_time(s, t + usec);
2477}
2478
f7262a9f 2479_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2480 assert_return(s, -EINVAL);
2481 assert_return(usec, -EINVAL);
6a0f1f6d 2482 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2483 assert_return(!event_pid_changed(s->event), -ECHILD);
2484
2485 *usec = s->time.accuracy;
2486 return 0;
2487}
2488
f7262a9f 2489_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2490 int r;
6a0f1f6d 2491
305f78bf
LP
2492 assert_return(s, -EINVAL);
2493 assert_return(usec != (uint64_t) -1, -EINVAL);
6a0f1f6d 2494 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2495 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2496 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2497
2a0dc6cd
LP
2498 r = source_set_pending(s, false);
2499 if (r < 0)
2500 return r;
2501
eaa3cbef
LP
2502 if (usec == 0)
2503 usec = DEFAULT_ACCURACY_USEC;
2504
eaa3cbef
LP
2505 s->time.accuracy = usec;
2506
e1951c16 2507 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2508 return 0;
2509}
2510
2511_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2512 assert_return(s, -EINVAL);
2513 assert_return(clock, -EINVAL);
2514 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2515 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2516
6a0f1f6d 2517 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2518 return 0;
2519}
2520
f7262a9f 2521_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2522 assert_return(s, -EINVAL);
2523 assert_return(pid, -EINVAL);
2524 assert_return(s->type == SOURCE_CHILD, -EDOM);
2525 assert_return(!event_pid_changed(s->event), -ECHILD);
2526
2527 *pid = s->child.pid;
2528 return 0;
2529}
2530
f8f3f926
LP
2531_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2532 assert_return(s, -EINVAL);
2533 assert_return(s->type == SOURCE_CHILD, -EDOM);
2534 assert_return(!event_pid_changed(s->event), -ECHILD);
2535
2536 if (s->child.pidfd < 0)
2537 return -EOPNOTSUPP;
2538
2539 return s->child.pidfd;
2540}
2541
2542_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2543 assert_return(s, -EINVAL);
2544 assert_return(s->type == SOURCE_CHILD, -EDOM);
2545 assert_return(!event_pid_changed(s->event), -ECHILD);
2546 assert_return(SIGNAL_VALID(sig), -EINVAL);
2547
2548 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2549 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2550 * available. */
2551 if (s->child.exited)
2552 return -ESRCH;
2553
2554 if (s->child.pidfd >= 0) {
2555 siginfo_t copy;
2556
2557 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2558 * structure here */
2559 if (si)
2560 copy = *si;
2561
2562 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2563 /* Let's propagate the error only if the system call is not implemented or prohibited */
2564 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2565 return -errno;
2566 } else
2567 return 0;
2568 }
2569
2570 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2571 * this here. */
2572 if (flags != 0)
2573 return -EOPNOTSUPP;
2574
2575 if (si) {
2576 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2577 siginfo_t copy = *si;
2578
2579 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2580 return -errno;
2581 } else if (kill(s->child.pid, sig) < 0)
2582 return -errno;
2583
2584 return 0;
2585}
2586
2587_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2588 assert_return(s, -EINVAL);
2589 assert_return(s->type == SOURCE_CHILD, -EDOM);
2590
2591 if (s->child.pidfd < 0)
2592 return -EOPNOTSUPP;
2593
2594 return s->child.pidfd_owned;
2595}
2596
2597_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2598 assert_return(s, -EINVAL);
2599 assert_return(s->type == SOURCE_CHILD, -EDOM);
2600
2601 if (s->child.pidfd < 0)
2602 return -EOPNOTSUPP;
2603
2604 s->child.pidfd_owned = own;
2605 return 0;
2606}
2607
2608_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2609 assert_return(s, -EINVAL);
2610 assert_return(s->type == SOURCE_CHILD, -EDOM);
2611
2612 return s->child.process_owned;
2613}
2614
2615_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2616 assert_return(s, -EINVAL);
2617 assert_return(s->type == SOURCE_CHILD, -EDOM);
2618
2619 s->child.process_owned = own;
2620 return 0;
2621}
2622
97ef5391
LP
2623_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2624 assert_return(s, -EINVAL);
2625 assert_return(mask, -EINVAL);
2626 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2627 assert_return(!event_pid_changed(s->event), -ECHILD);
2628
2629 *mask = s->inotify.mask;
2630 return 0;
2631}
2632
718db961 2633_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2634 int r;
2635
da7e457c 2636 assert_return(s, -EINVAL);
6203e07a 2637 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2638 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2639 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2640
2641 if (s->prepare == callback)
2642 return 0;
2643
2644 if (callback && s->prepare) {
2645 s->prepare = callback;
2646 return 0;
2647 }
2648
2649 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2650 if (r < 0)
2651 return r;
2652
2653 s->prepare = callback;
2654
2655 if (callback) {
2656 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2657 if (r < 0)
2658 return r;
2659 } else
2660 prioq_remove(s->event->prepare, s, &s->prepare_index);
2661
2662 return 0;
2663}
2664
f7262a9f 2665_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2666 assert_return(s, NULL);
fd38203a
LP
2667
2668 return s->userdata;
2669}
2670
8f726607
LP
2671_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2672 void *ret;
2673
2674 assert_return(s, NULL);
2675
2676 ret = s->userdata;
2677 s->userdata = userdata;
2678
2679 return ret;
2680}
2681
c2ba3ad6
LP
2682static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2683 usec_t c;
2684 assert(e);
2685 assert(a <= b);
2686
2687 if (a <= 0)
2688 return 0;
393003e1
LP
2689 if (a >= USEC_INFINITY)
2690 return USEC_INFINITY;
c2ba3ad6
LP
2691
2692 if (b <= a + 1)
2693 return a;
2694
52444dc4
LP
2695 initialize_perturb(e);
2696
c2ba3ad6
LP
2697 /*
2698 Find a good time to wake up again between times a and b. We
2699 have two goals here:
2700
2701 a) We want to wake up as seldom as possible, hence prefer
2702 later times over earlier times.
2703
2704 b) But if we have to wake up, then let's make sure to
2705 dispatch as much as possible on the entire system.
2706
2707 We implement this by waking up everywhere at the same time
850516e0 2708 within any given minute if we can, synchronised via the
c2ba3ad6 2709 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
2710 then we try to find the same spot in every 10s, then 1s and
2711 then 250ms step. Otherwise, we pick the last possible time
2712 to wake up.
c2ba3ad6
LP
2713 */
2714
850516e0
LP
2715 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2716 if (c >= b) {
2717 if (_unlikely_(c < USEC_PER_MINUTE))
2718 return b;
2719
2720 c -= USEC_PER_MINUTE;
2721 }
2722
ba276c81
LP
2723 if (c >= a)
2724 return c;
2725
2726 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2727 if (c >= b) {
2728 if (_unlikely_(c < USEC_PER_SEC*10))
2729 return b;
2730
2731 c -= USEC_PER_SEC*10;
2732 }
2733
850516e0
LP
2734 if (c >= a)
2735 return c;
2736
2737 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
2738 if (c >= b) {
2739 if (_unlikely_(c < USEC_PER_SEC))
2740 return b;
2741
2742 c -= USEC_PER_SEC;
2743 }
2744
2745 if (c >= a)
2746 return c;
2747
2748 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2749 if (c >= b) {
2750 if (_unlikely_(c < USEC_PER_MSEC*250))
2751 return b;
2752
2753 c -= USEC_PER_MSEC*250;
2754 }
2755
2756 if (c >= a)
2757 return c;
2758
2759 return b;
2760}
2761
fd38203a
LP
2762static int event_arm_timer(
2763 sd_event *e,
6a0f1f6d 2764 struct clock_data *d) {
fd38203a
LP
2765
2766 struct itimerspec its = {};
c2ba3ad6
LP
2767 sd_event_source *a, *b;
2768 usec_t t;
fd38203a 2769
cde93897 2770 assert(e);
6a0f1f6d 2771 assert(d);
fd38203a 2772
d06441da 2773 if (!d->needs_rearm)
212bbb17
TG
2774 return 0;
2775 else
2776 d->needs_rearm = false;
2777
6a0f1f6d 2778 a = prioq_peek(d->earliest);
393003e1 2779 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
72aedc1e 2780
6a0f1f6d 2781 if (d->fd < 0)
c57b5ca3
LP
2782 return 0;
2783
3a43da28 2784 if (d->next == USEC_INFINITY)
72aedc1e
LP
2785 return 0;
2786
2787 /* disarm */
15c689d7
LP
2788 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
2789 return -errno;
72aedc1e 2790
3a43da28 2791 d->next = USEC_INFINITY;
fd38203a 2792 return 0;
72aedc1e 2793 }
fd38203a 2794
6a0f1f6d 2795 b = prioq_peek(d->latest);
baf76283 2796 assert_se(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 2797
1bce0ffa 2798 t = sleep_between(e, a->time.next, time_event_source_latest(b));
6a0f1f6d 2799 if (d->next == t)
fd38203a
LP
2800 return 0;
2801
6a0f1f6d 2802 assert_se(d->fd >= 0);
fd38203a 2803
c2ba3ad6 2804 if (t == 0) {
fd38203a
LP
2805 /* We don' want to disarm here, just mean some time looooong ago. */
2806 its.it_value.tv_sec = 0;
2807 its.it_value.tv_nsec = 1;
2808 } else
c2ba3ad6 2809 timespec_store(&its.it_value, t);
fd38203a 2810
15c689d7 2811 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 2812 return -errno;
fd38203a 2813
6a0f1f6d 2814 d->next = t;
fd38203a
LP
2815 return 0;
2816}
2817
9a800b56 2818static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
2819 assert(e);
2820 assert(s);
2821 assert(s->type == SOURCE_IO);
2822
9a800b56
LP
2823 /* If the event source was already pending, we just OR in the
2824 * new revents, otherwise we reset the value. The ORing is
2825 * necessary to handle EPOLLONESHOT events properly where
2826 * readability might happen independently of writability, and
2827 * we need to keep track of both */
2828
2829 if (s->pending)
2830 s->io.revents |= revents;
2831 else
2832 s->io.revents = revents;
fd38203a 2833
fd38203a
LP
2834 return source_set_pending(s, true);
2835}
2836
72aedc1e 2837static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
2838 uint64_t x;
2839 ssize_t ss;
2840
2841 assert(e);
da7e457c 2842 assert(fd >= 0);
72aedc1e 2843
305f78bf 2844 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
2845
2846 ss = read(fd, &x, sizeof(x));
2847 if (ss < 0) {
945c2931 2848 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
2849 return 0;
2850
2851 return -errno;
2852 }
2853
8d35dae7 2854 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
2855 return -EIO;
2856
cde93897 2857 if (next)
3a43da28 2858 *next = USEC_INFINITY;
72aedc1e 2859
fd38203a
LP
2860 return 0;
2861}
2862
305f78bf
LP
2863static int process_timer(
2864 sd_event *e,
2865 usec_t n,
6a0f1f6d 2866 struct clock_data *d) {
305f78bf 2867
fd38203a
LP
2868 sd_event_source *s;
2869 int r;
2870
2871 assert(e);
6a0f1f6d 2872 assert(d);
fd38203a
LP
2873
2874 for (;;) {
6a0f1f6d 2875 s = prioq_peek(d->earliest);
fd38203a
LP
2876 if (!s ||
2877 s->time.next > n ||
baf76283 2878 s->enabled == SD_EVENT_OFF ||
fd38203a
LP
2879 s->pending)
2880 break;
2881
2882 r = source_set_pending(s, true);
2883 if (r < 0)
2884 return r;
2885
e1951c16 2886 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2887 }
2888
2889 return 0;
2890}
2891
2892static int process_child(sd_event *e) {
2893 sd_event_source *s;
fd38203a
LP
2894 int r;
2895
2896 assert(e);
2897
c2ba3ad6
LP
2898 e->need_process_child = false;
2899
fd38203a
LP
2900 /*
2901 So, this is ugly. We iteratively invoke waitid() with P_PID
2902 + WNOHANG for each PID we wait for, instead of using
2903 P_ALL. This is because we only want to get child
2904 information of very specific child processes, and not all
2905 of them. We might not have processed the SIGCHLD even of a
2906 previous invocation and we don't want to maintain a
2907 unbounded *per-child* event queue, hence we really don't
2908 want anything flushed out of the kernel's queue that we
2909 don't care about. Since this is O(n) this means that if you
2910 have a lot of processes you probably want to handle SIGCHLD
2911 yourself.
08cd1552
LP
2912
2913 We do not reap the children here (by using WNOWAIT), this
2914 is only done after the event source is dispatched so that
2915 the callback still sees the process as a zombie.
fd38203a
LP
2916 */
2917
90e74a66 2918 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
2919 assert(s->type == SOURCE_CHILD);
2920
2921 if (s->pending)
2922 continue;
2923
baf76283 2924 if (s->enabled == SD_EVENT_OFF)
fd38203a
LP
2925 continue;
2926
f8f3f926
LP
2927 if (s->child.exited)
2928 continue;
2929
2930 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2931 continue;
2932
fd38203a 2933 zero(s->child.siginfo);
15c689d7
LP
2934 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
2935 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
fd38203a
LP
2936 return -errno;
2937
2938 if (s->child.siginfo.si_pid != 0) {
945c2931 2939 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 2940
f8f3f926
LP
2941 if (zombie)
2942 s->child.exited = true;
2943
08cd1552
LP
2944 if (!zombie && (s->child.options & WEXITED)) {
2945 /* If the child isn't dead then let's
2946 * immediately remove the state change
2947 * from the queue, since there's no
2948 * benefit in leaving it queued */
2949
2950 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 2951 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
2952 }
2953
fd38203a
LP
2954 r = source_set_pending(s, true);
2955 if (r < 0)
2956 return r;
2957 }
2958 }
2959
fd38203a
LP
2960 return 0;
2961}
2962
f8f3f926
LP
2963static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2964 assert(e);
2965 assert(s);
2966 assert(s->type == SOURCE_CHILD);
2967
2968 if (s->pending)
2969 return 0;
2970
2971 if (s->enabled == SD_EVENT_OFF)
2972 return 0;
2973
2974 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2975 return 0;
2976
2977 zero(s->child.siginfo);
2978 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2979 return -errno;
2980
2981 if (s->child.siginfo.si_pid == 0)
2982 return 0;
2983
2984 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2985 s->child.exited = true;
2986
2987 return source_set_pending(s, true);
2988}
2989
9da4cb2b 2990static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
fd38203a 2991 bool read_one = false;
fd38203a
LP
2992 int r;
2993
da7e457c 2994 assert(e);
97ef5391 2995 assert(d);
305f78bf 2996 assert_return(events == EPOLLIN, -EIO);
fd38203a 2997
9da4cb2b
LP
2998 /* If there's a signal queued on this priority and SIGCHLD is
2999 on this priority too, then make sure to recheck the
3000 children we watch. This is because we only ever dequeue
3001 the first signal per priority, and if we dequeue one, and
3002 SIGCHLD might be enqueued later we wouldn't know, but we
3003 might have higher priority children we care about hence we
3004 need to check that explicitly. */
3005
3006 if (sigismember(&d->sigset, SIGCHLD))
3007 e->need_process_child = true;
3008
3009 /* If there's already an event source pending for this
3010 * priority we don't read another */
3011 if (d->current)
3012 return 0;
3013
fd38203a 3014 for (;;) {
0eb2e0e3 3015 struct signalfd_siginfo si;
7057bd99 3016 ssize_t n;
92daebc0 3017 sd_event_source *s = NULL;
fd38203a 3018
9da4cb2b 3019 n = read(d->fd, &si, sizeof(si));
7057bd99 3020 if (n < 0) {
945c2931 3021 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
3022 return read_one;
3023
3024 return -errno;
3025 }
3026
7057bd99 3027 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3028 return -EIO;
3029
6eb7c172 3030 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3031
fd38203a
LP
3032 read_one = true;
3033
92daebc0
LP
3034 if (e->signal_sources)
3035 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3036 if (!s)
3037 continue;
9da4cb2b
LP
3038 if (s->pending)
3039 continue;
fd38203a
LP
3040
3041 s->signal.siginfo = si;
9da4cb2b
LP
3042 d->current = s;
3043
fd38203a
LP
3044 r = source_set_pending(s, true);
3045 if (r < 0)
3046 return r;
9da4cb2b
LP
3047
3048 return 1;
fd38203a 3049 }
fd38203a
LP
3050}
3051
97ef5391
LP
3052static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3053 ssize_t n;
3054
3055 assert(e);
3056 assert(d);
3057
3058 assert_return(revents == EPOLLIN, -EIO);
3059
3060 /* If there's already an event source pending for this priority, don't read another */
3061 if (d->n_pending > 0)
3062 return 0;
3063
3064 /* Is the read buffer non-empty? If so, let's not read more */
3065 if (d->buffer_filled > 0)
3066 return 0;
3067
3068 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3069 if (n < 0) {
3070 if (IN_SET(errno, EAGAIN, EINTR))
3071 return 0;
3072
3073 return -errno;
3074 }
3075
3076 assert(n > 0);
3077 d->buffer_filled = (size_t) n;
3078 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3079
3080 return 1;
3081}
3082
3083static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3084 assert(e);
3085 assert(d);
3086 assert(sz <= d->buffer_filled);
3087
3088 if (sz == 0)
3089 return;
3090
3091 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3092 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3093 d->buffer_filled -= sz;
3094
3095 if (d->buffer_filled == 0)
3096 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3097}
3098
3099static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3100 int r;
3101
3102 assert(e);
3103 assert(d);
3104
3105 /* If there's already an event source pending for this priority, don't read another */
3106 if (d->n_pending > 0)
3107 return 0;
3108
3109 while (d->buffer_filled > 0) {
3110 size_t sz;
3111
3112 /* Let's validate that the event structures are complete */
3113 if (d->buffer_filled < offsetof(struct inotify_event, name))
3114 return -EIO;
3115
3116 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3117 if (d->buffer_filled < sz)
3118 return -EIO;
3119
3120 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3121 struct inode_data *inode_data;
97ef5391
LP
3122
3123 /* The queue overran, let's pass this event to all event sources connected to this inotify
3124 * object */
3125
90e74a66 3126 HASHMAP_FOREACH(inode_data, d->inodes) {
97ef5391
LP
3127 sd_event_source *s;
3128
3129 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3130
3131 if (s->enabled == SD_EVENT_OFF)
3132 continue;
3133
3134 r = source_set_pending(s, true);
3135 if (r < 0)
3136 return r;
3137 }
3138 }
3139 } else {
3140 struct inode_data *inode_data;
3141 sd_event_source *s;
3142
3143 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3144 * our watch descriptor table. */
3145 if (d->buffer.ev.mask & IN_IGNORED) {
3146
3147 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3148 if (!inode_data) {
3149 event_inotify_data_drop(e, d, sz);
3150 continue;
3151 }
3152
3153 /* The watch descriptor was removed by the kernel, let's drop it here too */
3154 inode_data->wd = -1;
3155 } else {
3156 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3157 if (!inode_data) {
3158 event_inotify_data_drop(e, d, sz);
3159 continue;
3160 }
3161 }
3162
3163 /* Trigger all event sources that are interested in these events. Also trigger all event
3164 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3165 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3166
3167 if (s->enabled == SD_EVENT_OFF)
3168 continue;
3169
3170 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3171 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3172 continue;
3173
3174 r = source_set_pending(s, true);
3175 if (r < 0)
3176 return r;
3177 }
3178 }
3179
3180 /* Something pending now? If so, let's finish, otherwise let's read more. */
3181 if (d->n_pending > 0)
3182 return 1;
3183 }
3184
3185 return 0;
3186}
3187
3188static int process_inotify(sd_event *e) {
3189 struct inotify_data *d;
3190 int r, done = 0;
3191
3192 assert(e);
3193
3194 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3195 r = event_inotify_data_process(e, d);
3196 if (r < 0)
3197 return r;
3198 if (r > 0)
3199 done ++;
3200 }
3201
3202 return done;
3203}
3204
fd38203a 3205static int source_dispatch(sd_event_source *s) {
b778cba4 3206 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3207 EventSourceType saved_type;
fe8245eb 3208 int r = 0;
fd38203a
LP
3209
3210 assert(s);
6203e07a 3211 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3212
b778cba4
LP
3213 /* Save the event source type, here, so that we still know it after the event callback which might
3214 * invalidate the event. */
8f5c235d
LP
3215 saved_type = s->type;
3216
b778cba4
LP
3217 /* Similar, store a reference to the event loop object, so that we can still access it after the
3218 * callback might have invalidated/disconnected the event source. */
3219 saved_event = sd_event_ref(s->event);
3220
945c2931 3221 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3222 r = source_set_pending(s, false);
3223 if (r < 0)
3224 return r;
3225 }
fd38203a 3226
6e9feda3
LP
3227 if (s->type != SOURCE_POST) {
3228 sd_event_source *z;
6e9feda3
LP
3229
3230 /* If we execute a non-post source, let's mark all
3231 * post sources as pending */
3232
90e74a66 3233 SET_FOREACH(z, s->event->post_sources) {
6e9feda3
LP
3234 if (z->enabled == SD_EVENT_OFF)
3235 continue;
3236
3237 r = source_set_pending(z, true);
3238 if (r < 0)
3239 return r;
3240 }
3241 }
3242
baf76283
LP
3243 if (s->enabled == SD_EVENT_ONESHOT) {
3244 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3245 if (r < 0)
3246 return r;
3247 }
3248
12179984 3249 s->dispatching = true;
b7484e2a 3250
fd38203a
LP
3251 switch (s->type) {
3252
3253 case SOURCE_IO:
3254 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3255 break;
3256
6a0f1f6d 3257 case SOURCE_TIME_REALTIME:
a8548816 3258 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3259 case SOURCE_TIME_MONOTONIC:
3260 case SOURCE_TIME_REALTIME_ALARM:
3261 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3262 r = s->time.callback(s, s->time.next, s->userdata);
3263 break;
3264
3265 case SOURCE_SIGNAL:
3266 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3267 break;
3268
08cd1552
LP
3269 case SOURCE_CHILD: {
3270 bool zombie;
3271
945c2931 3272 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3273
fd38203a 3274 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3275
3276 /* Now, reap the PID for good. */
f8f3f926 3277 if (zombie) {
cc59d290 3278 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3279 s->child.waited = true;
3280 }
08cd1552 3281
fd38203a 3282 break;
08cd1552 3283 }
fd38203a
LP
3284
3285 case SOURCE_DEFER:
3286 r = s->defer.callback(s, s->userdata);
3287 break;
da7e457c 3288
6e9feda3
LP
3289 case SOURCE_POST:
3290 r = s->post.callback(s, s->userdata);
3291 break;
3292
6203e07a
LP
3293 case SOURCE_EXIT:
3294 r = s->exit.callback(s, s->userdata);
da7e457c 3295 break;
9d3e3aa5 3296
97ef5391
LP
3297 case SOURCE_INOTIFY: {
3298 struct sd_event *e = s->event;
3299 struct inotify_data *d;
3300 size_t sz;
3301
3302 assert(s->inotify.inode_data);
3303 assert_se(d = s->inotify.inode_data->inotify_data);
3304
3305 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3306 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3307 assert(d->buffer_filled >= sz);
3308
3309 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3310
3311 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3312 * buffer. */
3313 if (d->n_pending == 0)
3314 event_inotify_data_drop(e, d, sz);
3315
3316 break;
3317 }
3318
9d3e3aa5 3319 case SOURCE_WATCHDOG:
a71fe8b8 3320 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3321 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
9d3e3aa5 3322 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
3323 }
3324
12179984
LP
3325 s->dispatching = false;
3326
b778cba4
LP
3327 if (r < 0) {
3328 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3329 strna(s->description),
3330 event_source_type_to_string(saved_type),
3331 s->exit_on_failure ? "exiting" : "disabling");
3332
3333 if (s->exit_on_failure)
3334 (void) sd_event_exit(saved_event, r);
3335 }
12179984
LP
3336
3337 if (s->n_ref == 0)
3338 source_free(s);
3339 else if (r < 0)
6203e07a 3340 sd_event_source_set_enabled(s, SD_EVENT_OFF);
b7484e2a 3341
6203e07a 3342 return 1;
fd38203a
LP
3343}
3344
3345static int event_prepare(sd_event *e) {
3346 int r;
3347
3348 assert(e);
3349
3350 for (;;) {
3351 sd_event_source *s;
3352
3353 s = prioq_peek(e->prepare);
baf76283 3354 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
fd38203a
LP
3355 break;
3356
3357 s->prepare_iteration = e->iteration;
3358 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3359 if (r < 0)
3360 return r;
3361
3362 assert(s->prepare);
12179984
LP
3363
3364 s->dispatching = true;
fd38203a 3365 r = s->prepare(s, s->userdata);
12179984
LP
3366 s->dispatching = false;
3367
b778cba4
LP
3368 if (r < 0) {
3369 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3370 strna(s->description),
3371 event_source_type_to_string(s->type),
3372 s->exit_on_failure ? "exiting" : "disabling");
3373
3374 if (s->exit_on_failure)
3375 (void) sd_event_exit(e, r);
3376 }
fd38203a 3377
12179984
LP
3378 if (s->n_ref == 0)
3379 source_free(s);
3380 else if (r < 0)
3381 sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3382 }
3383
3384 return 0;
3385}
3386
6203e07a 3387static int dispatch_exit(sd_event *e) {
da7e457c 3388 sd_event_source *p;
30dd293c 3389 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
da7e457c
LP
3390 int r;
3391
3392 assert(e);
3393
6203e07a 3394 p = prioq_peek(e->exit);
baf76283 3395 if (!p || p->enabled == SD_EVENT_OFF) {
da7e457c
LP
3396 e->state = SD_EVENT_FINISHED;
3397 return 0;
3398 }
3399
30dd293c 3400 ref = sd_event_ref(e);
da7e457c 3401 e->iteration++;
6203e07a 3402 e->state = SD_EVENT_EXITING;
da7e457c 3403 r = source_dispatch(p);
2b0c9ef7 3404 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3405 return r;
3406}
3407
c2ba3ad6
LP
3408static sd_event_source* event_next_pending(sd_event *e) {
3409 sd_event_source *p;
3410
da7e457c
LP
3411 assert(e);
3412
c2ba3ad6
LP
3413 p = prioq_peek(e->pending);
3414 if (!p)
3415 return NULL;
3416
baf76283 3417 if (p->enabled == SD_EVENT_OFF)
c2ba3ad6
LP
3418 return NULL;
3419
3420 return p;
3421}
3422
cde93897
LP
3423static int arm_watchdog(sd_event *e) {
3424 struct itimerspec its = {};
3425 usec_t t;
cde93897
LP
3426
3427 assert(e);
3428 assert(e->watchdog_fd >= 0);
3429
3430 t = sleep_between(e,
3431 e->watchdog_last + (e->watchdog_period / 2),
3432 e->watchdog_last + (e->watchdog_period * 3 / 4));
3433
3434 timespec_store(&its.it_value, t);
3435
75145780
LP
3436 /* Make sure we never set the watchdog to 0, which tells the
3437 * kernel to disable it. */
3438 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3439 its.it_value.tv_nsec = 1;
3440
15c689d7 3441 if (timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897
LP
3442 return -errno;
3443
3444 return 0;
3445}
3446
3447static int process_watchdog(sd_event *e) {
3448 assert(e);
3449
3450 if (!e->watchdog)
3451 return 0;
3452
3453 /* Don't notify watchdog too often */
3454 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3455 return 0;
3456
3457 sd_notify(false, "WATCHDOG=1");
3458 e->watchdog_last = e->timestamp.monotonic;
3459
3460 return arm_watchdog(e);
3461}
3462
97ef5391
LP
3463static void event_close_inode_data_fds(sd_event *e) {
3464 struct inode_data *d;
3465
3466 assert(e);
3467
3468 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3469 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3470 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3471 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3472 * compromise. */
3473
3474 while ((d = e->inode_data_to_close)) {
3475 assert(d->fd >= 0);
3476 d->fd = safe_close(d->fd);
3477
3478 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3479 }
3480}
3481
c45a5a74
TG
3482_public_ int sd_event_prepare(sd_event *e) {
3483 int r;
fd38203a 3484
da7e457c 3485 assert_return(e, -EINVAL);
b937d761 3486 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3487 assert_return(!event_pid_changed(e), -ECHILD);
3488 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3489 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3490
e5446015
LP
3491 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3492 * this check here once, since gettid() is typically not cached, and thus want to minimize
3493 * syscalls */
3494 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3495
6203e07a 3496 if (e->exit_requested)
c45a5a74 3497 goto pending;
fd38203a
LP
3498
3499 e->iteration++;
3500
0be6c2f6 3501 e->state = SD_EVENT_PREPARING;
fd38203a 3502 r = event_prepare(e);
0be6c2f6 3503 e->state = SD_EVENT_INITIAL;
fd38203a 3504 if (r < 0)
c45a5a74 3505 return r;
fd38203a 3506
6a0f1f6d
LP
3507 r = event_arm_timer(e, &e->realtime);
3508 if (r < 0)
c45a5a74 3509 return r;
6a0f1f6d 3510
a8548816
TG
3511 r = event_arm_timer(e, &e->boottime);
3512 if (r < 0)
c45a5a74 3513 return r;
a8548816 3514
6a0f1f6d
LP
3515 r = event_arm_timer(e, &e->monotonic);
3516 if (r < 0)
c45a5a74 3517 return r;
6a0f1f6d
LP
3518
3519 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3520 if (r < 0)
c45a5a74 3521 return r;
fd38203a 3522
6a0f1f6d 3523 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3524 if (r < 0)
c45a5a74 3525 return r;
fd38203a 3526
97ef5391
LP
3527 event_close_inode_data_fds(e);
3528
1b5995b0 3529 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3530 goto pending;
3531
2b0c9ef7 3532 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3533
3534 return 0;
3535
3536pending:
2b0c9ef7 3537 e->state = SD_EVENT_ARMED;
6d148a84
TG
3538 r = sd_event_wait(e, 0);
3539 if (r == 0)
2b0c9ef7 3540 e->state = SD_EVENT_ARMED;
6d148a84
TG
3541
3542 return r;
c45a5a74
TG
3543}
3544
3545_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
5cddd924 3546 size_t event_queue_max;
c45a5a74
TG
3547 int r, m, i;
3548
3549 assert_return(e, -EINVAL);
b937d761 3550 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3551 assert_return(!event_pid_changed(e), -ECHILD);
3552 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3553 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
c45a5a74
TG
3554
3555 if (e->exit_requested) {
3556 e->state = SD_EVENT_PENDING;
3557 return 1;
3558 }
6a0f1f6d 3559
5cddd924
LP
3560 event_queue_max = MAX(e->n_sources, 1u);
3561 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3562 return -ENOMEM;
fd38203a 3563
97ef5391
LP
3564 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3565 if (e->inotify_data_buffered)
3566 timeout = 0;
3567
5cddd924 3568 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
bab4820e 3569 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
da7e457c 3570 if (m < 0) {
c45a5a74
TG
3571 if (errno == EINTR) {
3572 e->state = SD_EVENT_PENDING;
3573 return 1;
3574 }
3575
3576 r = -errno;
da7e457c
LP
3577 goto finish;
3578 }
fd38203a 3579
e475d10c 3580 triple_timestamp_get(&e->timestamp);
fd38203a
LP
3581
3582 for (i = 0; i < m; i++) {
3583
5cddd924
LP
3584 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3585 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3586 else {
5cddd924 3587 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3588
3589 switch (*t) {
3590
f8f3f926 3591 case WAKEUP_EVENT_SOURCE: {
5cddd924 3592 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
3593
3594 assert(s);
3595
3596 switch (s->type) {
3597
3598 case SOURCE_IO:
5cddd924 3599 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
3600 break;
3601
3602 case SOURCE_CHILD:
5cddd924 3603 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
3604 break;
3605
3606 default:
3607 assert_not_reached("Unexpected event source type");
3608 }
3609
9da4cb2b 3610 break;
f8f3f926 3611 }
fd38203a 3612
9da4cb2b 3613 case WAKEUP_CLOCK_DATA: {
5cddd924 3614 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
3615
3616 assert(d);
3617
5cddd924 3618 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
3619 break;
3620 }
3621
3622 case WAKEUP_SIGNAL_DATA:
5cddd924 3623 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
9da4cb2b
LP
3624 break;
3625
97ef5391 3626 case WAKEUP_INOTIFY_DATA:
5cddd924 3627 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
97ef5391
LP
3628 break;
3629
9da4cb2b
LP
3630 default:
3631 assert_not_reached("Invalid wake-up pointer");
3632 }
3633 }
fd38203a 3634 if (r < 0)
da7e457c 3635 goto finish;
fd38203a
LP
3636 }
3637
cde93897
LP
3638 r = process_watchdog(e);
3639 if (r < 0)
3640 goto finish;
3641
6a0f1f6d
LP
3642 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3643 if (r < 0)
3644 goto finish;
3645
e475d10c 3646 r = process_timer(e, e->timestamp.boottime, &e->boottime);
a8548816
TG
3647 if (r < 0)
3648 goto finish;
3649
6a0f1f6d
LP
3650 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3651 if (r < 0)
3652 goto finish;
3653
3654 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 3655 if (r < 0)
da7e457c 3656 goto finish;
fd38203a 3657
e475d10c 3658 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 3659 if (r < 0)
da7e457c 3660 goto finish;
fd38203a 3661
c2ba3ad6 3662 if (e->need_process_child) {
fd38203a
LP
3663 r = process_child(e);
3664 if (r < 0)
da7e457c 3665 goto finish;
fd38203a
LP
3666 }
3667
97ef5391
LP
3668 r = process_inotify(e);
3669 if (r < 0)
3670 goto finish;
3671
c45a5a74
TG
3672 if (event_next_pending(e)) {
3673 e->state = SD_EVENT_PENDING;
3674
3675 return 1;
da7e457c
LP
3676 }
3677
c45a5a74 3678 r = 0;
fd38203a 3679
da7e457c 3680finish:
2b0c9ef7 3681 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3682
3683 return r;
fd38203a
LP
3684}
3685
c45a5a74
TG
3686_public_ int sd_event_dispatch(sd_event *e) {
3687 sd_event_source *p;
3688 int r;
3689
3690 assert_return(e, -EINVAL);
b937d761 3691 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3692 assert_return(!event_pid_changed(e), -ECHILD);
3693 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3694 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3695
3696 if (e->exit_requested)
3697 return dispatch_exit(e);
3698
3699 p = event_next_pending(e);
3700 if (p) {
30dd293c 3701 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
c45a5a74 3702
30dd293c 3703 ref = sd_event_ref(e);
c45a5a74
TG
3704 e->state = SD_EVENT_RUNNING;
3705 r = source_dispatch(p);
2b0c9ef7 3706 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3707 return r;
3708 }
3709
2b0c9ef7 3710 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3711
3712 return 1;
3713}
3714
34b87517 3715static void event_log_delays(sd_event *e) {
442ac269
YW
3716 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3717 size_t l, i;
34b87517 3718
442ac269
YW
3719 p = b;
3720 l = sizeof(b);
3721 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3722 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
3723 e->delays[i] = 0;
3724 }
442ac269 3725 log_debug("Event loop iterations: %s", b);
34b87517
VC
3726}
3727
c45a5a74
TG
3728_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3729 int r;
3730
3731 assert_return(e, -EINVAL);
b937d761 3732 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3733 assert_return(!event_pid_changed(e), -ECHILD);
3734 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3735 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 3736
34b87517
VC
3737 if (e->profile_delays && e->last_run) {
3738 usec_t this_run;
3739 unsigned l;
3740
3741 this_run = now(CLOCK_MONOTONIC);
3742
3743 l = u64log2(this_run - e->last_run);
3744 assert(l < sizeof(e->delays));
3745 e->delays[l]++;
3746
3747 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3748 event_log_delays(e);
3749 e->last_log = this_run;
3750 }
3751 }
3752
c45a5a74 3753 r = sd_event_prepare(e);
53bac4e0
LP
3754 if (r == 0)
3755 /* There was nothing? Then wait... */
3756 r = sd_event_wait(e, timeout);
c45a5a74 3757
34b87517
VC
3758 if (e->profile_delays)
3759 e->last_run = now(CLOCK_MONOTONIC);
3760
02d30981 3761 if (r > 0) {
53bac4e0 3762 /* There's something now, then let's dispatch it */
02d30981
TG
3763 r = sd_event_dispatch(e);
3764 if (r < 0)
3765 return r;
53bac4e0
LP
3766
3767 return 1;
3768 }
3769
3770 return r;
c45a5a74
TG
3771}
3772
f7262a9f 3773_public_ int sd_event_loop(sd_event *e) {
30dd293c 3774 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
fd38203a
LP
3775 int r;
3776
da7e457c 3777 assert_return(e, -EINVAL);
b937d761 3778 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 3779 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 3780 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3781
30dd293c 3782 ref = sd_event_ref(e);
fd38203a 3783
da7e457c 3784 while (e->state != SD_EVENT_FINISHED) {
fd38203a
LP
3785 r = sd_event_run(e, (uint64_t) -1);
3786 if (r < 0)
30dd293c 3787 return r;
fd38203a
LP
3788 }
3789
30dd293c 3790 return e->exit_code;
fd38203a
LP
3791}
3792
9b364545 3793_public_ int sd_event_get_fd(sd_event *e) {
9b364545 3794 assert_return(e, -EINVAL);
b937d761 3795 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
3796 assert_return(!event_pid_changed(e), -ECHILD);
3797
3798 return e->epoll_fd;
3799}
3800
f7262a9f 3801_public_ int sd_event_get_state(sd_event *e) {
da7e457c 3802 assert_return(e, -EINVAL);
b937d761 3803 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3804 assert_return(!event_pid_changed(e), -ECHILD);
3805
3806 return e->state;
3807}
3808
6203e07a 3809_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 3810 assert_return(e, -EINVAL);
b937d761 3811 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 3812 assert_return(code, -EINVAL);
da7e457c 3813 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3814
6203e07a
LP
3815 if (!e->exit_requested)
3816 return -ENODATA;
3817
3818 *code = e->exit_code;
3819 return 0;
fd38203a
LP
3820}
3821
6203e07a 3822_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 3823 assert_return(e, -EINVAL);
b937d761 3824 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3825 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3826 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3827
6203e07a
LP
3828 e->exit_requested = true;
3829 e->exit_code = code;
3830
fd38203a
LP
3831 return 0;
3832}
46e8c825 3833
6a0f1f6d 3834_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 3835 assert_return(e, -EINVAL);
b937d761 3836 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 3837 assert_return(usec, -EINVAL);
46e8c825
LP
3838 assert_return(!event_pid_changed(e), -ECHILD);
3839
e475d10c
LP
3840 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3841 return -EOPNOTSUPP;
3842
3843 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3844 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3845 * the purpose of getting the time this doesn't matter. */
3411372e
LP
3846 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3847 return -EOPNOTSUPP;
3848
e475d10c 3849 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 3850 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
3851 *usec = now(clock);
3852 return 1;
3853 }
46e8c825 3854
e475d10c 3855 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
3856 return 0;
3857}
afc6adb5
LP
3858
3859_public_ int sd_event_default(sd_event **ret) {
39883f62 3860 sd_event *e = NULL;
afc6adb5
LP
3861 int r;
3862
3863 if (!ret)
3864 return !!default_event;
3865
3866 if (default_event) {
3867 *ret = sd_event_ref(default_event);
3868 return 0;
3869 }
3870
3871 r = sd_event_new(&e);
3872 if (r < 0)
3873 return r;
3874
3875 e->default_event_ptr = &default_event;
3876 e->tid = gettid();
3877 default_event = e;
3878
3879 *ret = e;
3880 return 1;
3881}
3882
3883_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3884 assert_return(e, -EINVAL);
b937d761 3885 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 3886 assert_return(tid, -EINVAL);
76b54375 3887 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 3888
76b54375
LP
3889 if (e->tid != 0) {
3890 *tid = e->tid;
3891 return 0;
3892 }
3893
3894 return -ENXIO;
afc6adb5 3895}
cde93897
LP
3896
3897_public_ int sd_event_set_watchdog(sd_event *e, int b) {
3898 int r;
3899
3900 assert_return(e, -EINVAL);
b937d761 3901 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 3902 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
3903
3904 if (e->watchdog == !!b)
3905 return e->watchdog;
3906
3907 if (b) {
09812eb7
LP
3908 r = sd_watchdog_enabled(false, &e->watchdog_period);
3909 if (r <= 0)
cde93897 3910 return r;
cde93897
LP
3911
3912 /* Issue first ping immediately */
3913 sd_notify(false, "WATCHDOG=1");
3914 e->watchdog_last = now(CLOCK_MONOTONIC);
3915
3916 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3917 if (e->watchdog_fd < 0)
3918 return -errno;
3919
3920 r = arm_watchdog(e);
3921 if (r < 0)
3922 goto fail;
3923
1eac7948 3924 struct epoll_event ev = {
a82f89aa
LP
3925 .events = EPOLLIN,
3926 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3927 };
cde93897 3928
15c689d7 3929 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
3930 r = -errno;
3931 goto fail;
3932 }
3933
3934 } else {
3935 if (e->watchdog_fd >= 0) {
5a795bff 3936 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 3937 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3938 }
3939 }
3940
3941 e->watchdog = !!b;
3942 return e->watchdog;
3943
3944fail:
03e334a1 3945 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3946 return r;
3947}
8f726607
LP
3948
3949_public_ int sd_event_get_watchdog(sd_event *e) {
3950 assert_return(e, -EINVAL);
b937d761 3951 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
3952 assert_return(!event_pid_changed(e), -ECHILD);
3953
3954 return e->watchdog;
3955}
60a3b1e1
LP
3956
3957_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3958 assert_return(e, -EINVAL);
b937d761 3959 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
3960 assert_return(!event_pid_changed(e), -ECHILD);
3961
3962 *ret = e->iteration;
3963 return 0;
3964}
15723a1d
LP
3965
3966_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3967 assert_return(s, -EINVAL);
3968
3969 s->destroy_callback = callback;
3970 return 0;
3971}
3972
3973_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3974 assert_return(s, -EINVAL);
3975
3976 if (ret)
3977 *ret = s->destroy_callback;
3978
3979 return !!s->destroy_callback;
3980}
2382c936
YW
3981
3982_public_ int sd_event_source_get_floating(sd_event_source *s) {
3983 assert_return(s, -EINVAL);
3984
3985 return s->floating;
3986}
3987
3988_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3989 assert_return(s, -EINVAL);
3990
3991 if (s->floating == !!b)
3992 return 0;
3993
3994 if (!s->event) /* Already disconnected */
3995 return -ESTALE;
3996
3997 s->floating = b;
3998
3999 if (b) {
4000 sd_event_source_ref(s);
4001 sd_event_unref(s->event);
4002 } else {
4003 sd_event_ref(s->event);
4004 sd_event_source_unref(s);
4005 }
4006
4007 return 1;
4008}
b778cba4
LP
4009
4010_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4011 assert_return(s, -EINVAL);
4012 assert_return(s->type != SOURCE_EXIT, -EDOM);
4013
4014 return s->exit_on_failure;
4015}
4016
4017_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4018 assert_return(s, -EINVAL);
4019 assert_return(s->type != SOURCE_EXIT, -EDOM);
4020
4021 if (s->exit_on_failure == !!b)
4022 return 0;
4023
4024 s->exit_on_failure = b;
4025 return 1;
4026}