]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
sd-event: split clock data allocation out of sd_event_add_time()
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
55cbfaa5
DM
40static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 53 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
54};
55
56DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
a8548816 58#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
6a0f1f6d 59
fd38203a 60struct sd_event {
da7e457c 61 unsigned n_ref;
fd38203a
LP
62
63 int epoll_fd;
cde93897 64 int watchdog_fd;
fd38203a
LP
65
66 Prioq *pending;
67 Prioq *prepare;
c2ba3ad6 68
a8548816 69 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
a8548816 73 struct clock_data boottime;
6a0f1f6d
LP
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
fd38203a 77
da7e457c
LP
78 usec_t perturb;
79
9da4cb2b
LP
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
82
83 Hashmap *child_sources;
baf76283 84 unsigned n_enabled_child_sources;
fd38203a 85
6e9feda3
LP
86 Set *post_sources;
87
6203e07a 88 Prioq *exit;
fd38203a 89
97ef5391
LP
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
da7e457c 98 pid_t original_pid;
c2ba3ad6 99
60a3b1e1 100 uint64_t iteration;
e475d10c 101 triple_timestamp timestamp;
da7e457c 102 int state;
eaa3cbef 103
6203e07a 104 bool exit_requested:1;
da7e457c 105 bool need_process_child:1;
cde93897 106 bool watchdog:1;
34b87517 107 bool profile_delays:1;
afc6adb5 108
6203e07a
LP
109 int exit_code;
110
afc6adb5
LP
111 pid_t tid;
112 sd_event **default_event_ptr;
cde93897
LP
113
114 usec_t watchdog_last, watchdog_period;
15b38f93
LP
115
116 unsigned n_sources;
a71fe8b8 117
5cddd924
LP
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
a71fe8b8 121 LIST_HEAD(sd_event_source, sources);
34b87517
VC
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
125};
126
b937d761
NM
127static thread_local sd_event *default_event = NULL;
128
a71fe8b8 129static void source_disconnect(sd_event_source *s);
97ef5391 130static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 131
b937d761
NM
132static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134}
135
fd38203a
LP
136static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
9c57a73b 138 int r;
fd38203a
LP
139
140 assert(x->pending);
141 assert(y->pending);
142
baf76283
LP
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 145 return -1;
baf76283 146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
147 return 1;
148
149 /* Lower priority values first */
9c57a73b
YW
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
fd38203a
LP
153
154 /* Older entries first */
9c57a73b 155 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
156}
157
158static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
9c57a73b 160 int r;
fd38203a
LP
161
162 assert(x->prepare);
163 assert(y->prepare);
164
8046c457
KK
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
fd38203a
LP
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
9c57a73b
YW
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
fd38203a 177
fd38203a 178 /* Lower priority values first */
9c57a73b 179 return CMP(x->priority, y->priority);
fd38203a
LP
180}
181
c2ba3ad6 182static int earliest_time_prioq_compare(const void *a, const void *b) {
fd38203a
LP
183 const sd_event_source *x = a, *y = b;
184
6a0f1f6d
LP
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
fd38203a 187
baf76283
LP
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 190 return -1;
baf76283 191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
9c57a73b 201 return CMP(x->time.next, y->time.next);
fd38203a
LP
202}
203
1bce0ffa
LP
204static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206}
207
c2ba3ad6
LP
208static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
6a0f1f6d
LP
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
c2ba3ad6 213
baf76283
LP
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
c2ba3ad6 216 return -1;
baf76283 217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
c2ba3ad6
LP
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
9c57a73b 227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
c2ba3ad6
LP
228}
229
6203e07a 230static int exit_prioq_compare(const void *a, const void *b) {
da7e457c
LP
231 const sd_event_source *x = a, *y = b;
232
6203e07a
LP
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
da7e457c 235
baf76283
LP
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
da7e457c 238 return -1;
baf76283 239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
da7e457c
LP
240 return 1;
241
242 /* Lower priority values first */
6dd91b36 243 return CMP(x->priority, y->priority);
da7e457c
LP
244}
245
6a0f1f6d
LP
246static void free_clock_data(struct clock_data *d) {
247 assert(d);
9da4cb2b 248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253}
254
8301aa0b 255static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
256 sd_event_source *s;
257
fd38203a 258 assert(e);
a71fe8b8
LP
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
15b38f93 266 assert(e->n_sources == 0);
fd38203a 267
afc6adb5
LP
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
03e334a1 271 safe_close(e->epoll_fd);
03e334a1 272 safe_close(e->watchdog_fd);
cde93897 273
6a0f1f6d 274 free_clock_data(&e->realtime);
a8548816 275 free_clock_data(&e->boottime);
6a0f1f6d
LP
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
fd38203a
LP
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
6203e07a 282 prioq_free(e->exit);
fd38203a
LP
283
284 free(e->signal_sources);
9da4cb2b 285 hashmap_free(e->signal_data);
fd38203a 286
97ef5391
LP
287 hashmap_free(e->inotify_data);
288
fd38203a 289 hashmap_free(e->child_sources);
6e9feda3 290 set_free(e->post_sources);
8301aa0b 291
5cddd924
LP
292 free(e->event_queue);
293
8301aa0b 294 return mfree(e);
fd38203a
LP
295}
296
f7262a9f 297_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
298 sd_event *e;
299 int r;
300
305f78bf 301 assert_return(ret, -EINVAL);
fd38203a 302
d08eb1fa 303 e = new(sd_event, 1);
fd38203a
LP
304 if (!e)
305 return -ENOMEM;
306
d08eb1fa
LP
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
fd38203a 329
c983e776
EV
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
fd38203a 332 goto fail;
fd38203a
LP
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
7fe2903c
LP
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
34b87517 342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
34a6843d 343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
34b87517
VC
344 e->profile_delays = true;
345 }
346
fd38203a
LP
347 *ret = e;
348 return 0;
349
350fail:
351 event_free(e);
352 return r;
353}
354
8301aa0b 355DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 356
afd15bbb
ZJS
357_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361}
362
eaa3cbef
LP
363static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
a2360a46 366 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
367 * it around over a fork(). Let's complain. */
368
df0ff127 369 return e->original_pid != getpid_cached();
eaa3cbef
LP
370}
371
366e6411 372static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
f6806734 376 if (event_pid_changed(s->event))
366e6411 377 return;
f6806734 378
fd38203a 379 if (!s->io.registered)
366e6411 380 return;
fd38203a 381
d1cf2023 382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
f80a5d6a 383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
55cbfaa5 384 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
385
386 s->io.registered = false;
fd38203a
LP
387}
388
305f78bf
LP
389static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
fd38203a
LP
394 assert(s);
395 assert(s->type == SOURCE_IO);
baf76283 396 assert(enabled != SD_EVENT_OFF);
fd38203a 397
1eac7948 398 struct epoll_event ev = {
a82f89aa
LP
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
fd38203a 402
15c689d7 403 if (epoll_ctl(s->event->epoll_fd,
1eac7948 404 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
55c540d3 405 s->io.fd, &ev) < 0)
fd38203a
LP
406 return -errno;
407
408 s->io.registered = true;
409
410 return 0;
411}
412
f8f3f926
LP
413static void source_child_pidfd_unregister(sd_event_source *s) {
414 assert(s);
415 assert(s->type == SOURCE_CHILD);
416
417 if (event_pid_changed(s->event))
418 return;
419
420 if (!s->child.registered)
421 return;
422
423 if (EVENT_SOURCE_WATCH_PIDFD(s))
424 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
f80a5d6a 425 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
f8f3f926
LP
426 strna(s->description), event_source_type_to_string(s->type));
427
428 s->child.registered = false;
429}
430
431static int source_child_pidfd_register(sd_event_source *s, int enabled) {
f8f3f926
LP
432 assert(s);
433 assert(s->type == SOURCE_CHILD);
434 assert(enabled != SD_EVENT_OFF);
435
436 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 437 struct epoll_event ev = {
f8f3f926
LP
438 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
439 .data.ptr = s,
440 };
441
55c540d3
ZJS
442 if (epoll_ctl(s->event->epoll_fd,
443 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
444 s->child.pidfd, &ev) < 0)
f8f3f926
LP
445 return -errno;
446 }
447
448 s->child.registered = true;
449 return 0;
450}
451
6a0f1f6d
LP
452static clockid_t event_source_type_to_clock(EventSourceType t) {
453
454 switch (t) {
455
456 case SOURCE_TIME_REALTIME:
457 return CLOCK_REALTIME;
458
a8548816
TG
459 case SOURCE_TIME_BOOTTIME:
460 return CLOCK_BOOTTIME;
461
6a0f1f6d
LP
462 case SOURCE_TIME_MONOTONIC:
463 return CLOCK_MONOTONIC;
464
465 case SOURCE_TIME_REALTIME_ALARM:
466 return CLOCK_REALTIME_ALARM;
467
468 case SOURCE_TIME_BOOTTIME_ALARM:
469 return CLOCK_BOOTTIME_ALARM;
470
471 default:
472 return (clockid_t) -1;
473 }
474}
475
476static EventSourceType clock_to_event_source_type(clockid_t clock) {
477
478 switch (clock) {
479
480 case CLOCK_REALTIME:
481 return SOURCE_TIME_REALTIME;
482
a8548816
TG
483 case CLOCK_BOOTTIME:
484 return SOURCE_TIME_BOOTTIME;
485
6a0f1f6d
LP
486 case CLOCK_MONOTONIC:
487 return SOURCE_TIME_MONOTONIC;
488
489 case CLOCK_REALTIME_ALARM:
490 return SOURCE_TIME_REALTIME_ALARM;
491
492 case CLOCK_BOOTTIME_ALARM:
493 return SOURCE_TIME_BOOTTIME_ALARM;
494
495 default:
496 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
497 }
498}
499
500static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
501 assert(e);
502
503 switch (t) {
504
505 case SOURCE_TIME_REALTIME:
506 return &e->realtime;
507
a8548816
TG
508 case SOURCE_TIME_BOOTTIME:
509 return &e->boottime;
510
6a0f1f6d
LP
511 case SOURCE_TIME_MONOTONIC:
512 return &e->monotonic;
513
514 case SOURCE_TIME_REALTIME_ALARM:
515 return &e->realtime_alarm;
516
517 case SOURCE_TIME_BOOTTIME_ALARM:
518 return &e->boottime_alarm;
519
520 default:
521 return NULL;
522 }
523}
524
3e4eb8e7
YW
525static void event_free_signal_data(sd_event *e, struct signal_data *d) {
526 assert(e);
527
528 if (!d)
529 return;
530
531 hashmap_remove(e->signal_data, &d->priority);
532 safe_close(d->fd);
533 free(d);
534}
535
9da4cb2b
LP
536static int event_make_signal_data(
537 sd_event *e,
538 int sig,
539 struct signal_data **ret) {
4807d2d0 540
9da4cb2b
LP
541 struct signal_data *d;
542 bool added = false;
543 sigset_t ss_copy;
544 int64_t priority;
f95387cd
ZJS
545 int r;
546
547 assert(e);
548
f6806734 549 if (event_pid_changed(e))
9da4cb2b 550 return -ECHILD;
f6806734 551
9da4cb2b
LP
552 if (e->signal_sources && e->signal_sources[sig])
553 priority = e->signal_sources[sig]->priority;
554 else
de05913d 555 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 556
9da4cb2b
LP
557 d = hashmap_get(e->signal_data, &priority);
558 if (d) {
559 if (sigismember(&d->sigset, sig) > 0) {
560 if (ret)
561 *ret = d;
562 return 0;
563 }
564 } else {
565 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
566 if (r < 0)
567 return r;
568
d08eb1fa 569 d = new(struct signal_data, 1);
9da4cb2b
LP
570 if (!d)
571 return -ENOMEM;
572
d08eb1fa
LP
573 *d = (struct signal_data) {
574 .wakeup = WAKEUP_SIGNAL_DATA,
575 .fd = -1,
576 .priority = priority,
577 };
9da4cb2b
LP
578
579 r = hashmap_put(e->signal_data, &d->priority, d);
90f604d1
ZJS
580 if (r < 0) {
581 free(d);
9da4cb2b 582 return r;
90f604d1 583 }
f95387cd 584
9da4cb2b
LP
585 added = true;
586 }
587
588 ss_copy = d->sigset;
589 assert_se(sigaddset(&ss_copy, sig) >= 0);
590
591 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
592 if (r < 0) {
593 r = -errno;
594 goto fail;
595 }
596
597 d->sigset = ss_copy;
f95387cd 598
9da4cb2b
LP
599 if (d->fd >= 0) {
600 if (ret)
601 *ret = d;
f95387cd 602 return 0;
9da4cb2b
LP
603 }
604
7fe2903c 605 d->fd = fd_move_above_stdio(r);
f95387cd 606
1eac7948 607 struct epoll_event ev = {
a82f89aa
LP
608 .events = EPOLLIN,
609 .data.ptr = d,
610 };
f95387cd 611
15c689d7 612 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
613 r = -errno;
614 goto fail;
f95387cd
ZJS
615 }
616
9da4cb2b
LP
617 if (ret)
618 *ret = d;
619
f95387cd 620 return 0;
9da4cb2b
LP
621
622fail:
3e4eb8e7
YW
623 if (added)
624 event_free_signal_data(e, d);
9da4cb2b
LP
625
626 return r;
627}
628
629static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
630 assert(e);
631 assert(d);
632
633 /* Turns off the specified signal in the signal data
634 * object. If the signal mask of the object becomes empty that
635 * way removes it. */
636
637 if (sigismember(&d->sigset, sig) == 0)
638 return;
639
640 assert_se(sigdelset(&d->sigset, sig) >= 0);
641
642 if (sigisemptyset(&d->sigset)) {
9da4cb2b 643 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 644 event_free_signal_data(e, d);
9da4cb2b
LP
645 return;
646 }
647
648 assert(d->fd >= 0);
649
650 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
651 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
652}
653
654static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
655 struct signal_data *d;
656 static const int64_t zero_priority = 0;
657
658 assert(e);
659
f8f3f926
LP
660 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
661 * and possibly drop the signalfd for it. */
9da4cb2b
LP
662
663 if (sig == SIGCHLD &&
664 e->n_enabled_child_sources > 0)
665 return;
666
667 if (e->signal_sources &&
668 e->signal_sources[sig] &&
669 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
670 return;
671
672 /*
673 * The specified signal might be enabled in three different queues:
674 *
675 * 1) the one that belongs to the priority passed (if it is non-NULL)
676 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
677 * 3) the 0 priority (to cover the SIGCHLD case)
678 *
679 * Hence, let's remove it from all three here.
680 */
681
682 if (priority) {
683 d = hashmap_get(e->signal_data, priority);
684 if (d)
685 event_unmask_signal_data(e, d, sig);
686 }
687
688 if (e->signal_sources && e->signal_sources[sig]) {
689 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
690 if (d)
691 event_unmask_signal_data(e, d, sig);
692 }
693
694 d = hashmap_get(e->signal_data, &zero_priority);
695 if (d)
696 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
697}
698
e1951c16
MS
699static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
700 assert(s);
701
702 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
703 * they are enabled/disabled or marked pending and such. */
704
705 if (s->pending)
706 prioq_reshuffle(s->event->pending, s, &s->pending_index);
707
708 if (s->prepare)
709 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
710}
711
712static void event_source_time_prioq_reshuffle(sd_event_source *s) {
713 struct clock_data *d;
714
715 assert(s);
716 assert(EVENT_SOURCE_IS_TIME(s->type));
717
718 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
719 * pending, enable state. Makes sure the two prioq's are ordered properly again. */
720 assert_se(d = event_get_clock_data(s->event, s->type));
721 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
722 prioq_reshuffle(d->latest, s, &s->time.latest_index);
723 d->needs_rearm = true;
724}
725
a71fe8b8
LP
726static void source_disconnect(sd_event_source *s) {
727 sd_event *event;
728
fd38203a
LP
729 assert(s);
730
a71fe8b8
LP
731 if (!s->event)
732 return;
15b38f93 733
a71fe8b8 734 assert(s->event->n_sources > 0);
fd38203a 735
a71fe8b8 736 switch (s->type) {
fd38203a 737
a71fe8b8
LP
738 case SOURCE_IO:
739 if (s->io.fd >= 0)
740 source_io_unregister(s);
fd38203a 741
a71fe8b8 742 break;
6a0f1f6d 743
a71fe8b8 744 case SOURCE_TIME_REALTIME:
a8548816 745 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
746 case SOURCE_TIME_MONOTONIC:
747 case SOURCE_TIME_REALTIME_ALARM:
748 case SOURCE_TIME_BOOTTIME_ALARM: {
749 struct clock_data *d;
fd38203a 750
a71fe8b8
LP
751 d = event_get_clock_data(s->event, s->type);
752 assert(d);
753
754 prioq_remove(d->earliest, s, &s->time.earliest_index);
755 prioq_remove(d->latest, s, &s->time.latest_index);
212bbb17 756 d->needs_rearm = true;
a71fe8b8
LP
757 break;
758 }
759
760 case SOURCE_SIGNAL:
761 if (s->signal.sig > 0) {
9da4cb2b 762
a71fe8b8
LP
763 if (s->event->signal_sources)
764 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 765
9da4cb2b 766 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 767 }
fd38203a 768
a71fe8b8 769 break;
fd38203a 770
a71fe8b8
LP
771 case SOURCE_CHILD:
772 if (s->child.pid > 0) {
773 if (s->enabled != SD_EVENT_OFF) {
774 assert(s->event->n_enabled_child_sources > 0);
775 s->event->n_enabled_child_sources--;
4807d2d0 776 }
fd38203a 777
4a0b58c4 778 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 779 }
fd38203a 780
f8f3f926
LP
781 if (EVENT_SOURCE_WATCH_PIDFD(s))
782 source_child_pidfd_unregister(s);
783 else
784 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
785
a71fe8b8 786 break;
fd38203a 787
a71fe8b8
LP
788 case SOURCE_DEFER:
789 /* nothing */
790 break;
fd38203a 791
a71fe8b8
LP
792 case SOURCE_POST:
793 set_remove(s->event->post_sources, s);
794 break;
da7e457c 795
a71fe8b8
LP
796 case SOURCE_EXIT:
797 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
798 break;
0eb2e0e3 799
97ef5391
LP
800 case SOURCE_INOTIFY: {
801 struct inode_data *inode_data;
802
803 inode_data = s->inotify.inode_data;
804 if (inode_data) {
805 struct inotify_data *inotify_data;
806 assert_se(inotify_data = inode_data->inotify_data);
807
808 /* Detach this event source from the inode object */
809 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
810 s->inotify.inode_data = NULL;
811
812 if (s->pending) {
813 assert(inotify_data->n_pending > 0);
814 inotify_data->n_pending--;
815 }
816
817 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
818 * continued to being watched. That's because inotify doesn't really have an API for that: we
819 * can only change watch masks with access to the original inode either by fd or by path. But
820 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 821 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
822 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
823 * there), but given the need for open_by_handle_at() which is privileged and not universally
824 * available this would be quite an incomplete solution. Hence we go the other way, leave the
825 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
826 * anymore after reception. Yes, this sucks, but … Linux … */
827
828 /* Maybe release the inode data (and its inotify) */
829 event_gc_inode_data(s->event, inode_data);
830 }
831
832 break;
833 }
834
a71fe8b8
LP
835 default:
836 assert_not_reached("Wut? I shouldn't exist.");
837 }
6e9feda3 838
a71fe8b8
LP
839 if (s->pending)
840 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 841
a71fe8b8
LP
842 if (s->prepare)
843 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 844
e514aa1e 845 event = TAKE_PTR(s->event);
a71fe8b8
LP
846 LIST_REMOVE(sources, event->sources, s);
847 event->n_sources--;
fd38203a 848
f5982559
LP
849 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
850 * pidfd associated with this event source, which we'll do only on source_free(). */
851
a71fe8b8
LP
852 if (!s->floating)
853 sd_event_unref(event);
854}
855
856static void source_free(sd_event_source *s) {
857 assert(s);
fd38203a 858
a71fe8b8 859 source_disconnect(s);
ab93297c
NM
860
861 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
862 s->io.fd = safe_close(s->io.fd);
863
f8f3f926
LP
864 if (s->type == SOURCE_CHILD) {
865 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
866
867 if (s->child.process_owned) {
868
869 if (!s->child.exited) {
870 bool sent = false;
871
872 if (s->child.pidfd >= 0) {
873 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
874 if (errno == ESRCH) /* Already dead */
875 sent = true;
876 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
877 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
878 s->child.pid);
879 } else
880 sent = true;
881 }
882
883 if (!sent)
884 if (kill(s->child.pid, SIGKILL) < 0)
885 if (errno != ESRCH) /* Already dead */
886 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
887 s->child.pid);
888 }
889
890 if (!s->child.waited) {
891 siginfo_t si = {};
892
893 /* Reap the child if we can */
894 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
895 }
896 }
897
898 if (s->child.pidfd_owned)
899 s->child.pidfd = safe_close(s->child.pidfd);
900 }
901
15723a1d
LP
902 if (s->destroy_callback)
903 s->destroy_callback(s->userdata);
ab93297c 904
356779df 905 free(s->description);
fd38203a
LP
906 free(s);
907}
8c75fe17 908DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
909
910static int source_set_pending(sd_event_source *s, bool b) {
911 int r;
912
913 assert(s);
6203e07a 914 assert(s->type != SOURCE_EXIT);
fd38203a
LP
915
916 if (s->pending == b)
917 return 0;
918
919 s->pending = b;
920
921 if (b) {
922 s->pending_iteration = s->event->iteration;
923
924 r = prioq_put(s->event->pending, s, &s->pending_index);
925 if (r < 0) {
926 s->pending = false;
927 return r;
928 }
929 } else
930 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
931
e1951c16
MS
932 if (EVENT_SOURCE_IS_TIME(s->type))
933 event_source_time_prioq_reshuffle(s);
2576a19e 934
9da4cb2b
LP
935 if (s->type == SOURCE_SIGNAL && !b) {
936 struct signal_data *d;
937
938 d = hashmap_get(s->event->signal_data, &s->priority);
939 if (d && d->current == s)
940 d->current = NULL;
941 }
942
97ef5391
LP
943 if (s->type == SOURCE_INOTIFY) {
944
945 assert(s->inotify.inode_data);
946 assert(s->inotify.inode_data->inotify_data);
947
948 if (b)
949 s->inotify.inode_data->inotify_data->n_pending ++;
950 else {
951 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
952 s->inotify.inode_data->inotify_data->n_pending --;
953 }
954 }
955
fd38203a
LP
956 return 0;
957}
958
a71fe8b8 959static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
960 sd_event_source *s;
961
962 assert(e);
963
d08eb1fa 964 s = new(sd_event_source, 1);
fd38203a
LP
965 if (!s)
966 return NULL;
967
d08eb1fa
LP
968 *s = (struct sd_event_source) {
969 .n_ref = 1,
970 .event = e,
971 .floating = floating,
972 .type = type,
973 .pending_index = PRIOQ_IDX_NULL,
974 .prepare_index = PRIOQ_IDX_NULL,
975 };
a71fe8b8
LP
976
977 if (!floating)
978 sd_event_ref(e);
fd38203a 979
a71fe8b8 980 LIST_PREPEND(sources, e->sources, s);
313cefa1 981 e->n_sources++;
15b38f93 982
fd38203a
LP
983 return s;
984}
985
b9350e70
LP
986static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
987 assert(s);
988
989 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
990}
991
f7262a9f 992_public_ int sd_event_add_io(
fd38203a 993 sd_event *e,
151b9b96 994 sd_event_source **ret,
fd38203a
LP
995 int fd,
996 uint32_t events,
718db961 997 sd_event_io_handler_t callback,
151b9b96 998 void *userdata) {
fd38203a 999
ec766a51 1000 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1001 int r;
1002
305f78bf 1003 assert_return(e, -EINVAL);
b937d761 1004 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1005 assert_return(fd >= 0, -EBADF);
2a16a986 1006 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1007 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1008 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1009
b9350e70
LP
1010 if (!callback)
1011 callback = io_exit_callback;
1012
a71fe8b8 1013 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1014 if (!s)
1015 return -ENOMEM;
1016
9da4cb2b 1017 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1018 s->io.fd = fd;
1019 s->io.events = events;
1020 s->io.callback = callback;
1021 s->userdata = userdata;
baf76283 1022 s->enabled = SD_EVENT_ON;
fd38203a 1023
baf76283 1024 r = source_io_register(s, s->enabled, events);
ec766a51 1025 if (r < 0)
050f74f2 1026 return r;
fd38203a 1027
a71fe8b8
LP
1028 if (ret)
1029 *ret = s;
ec766a51 1030 TAKE_PTR(s);
a71fe8b8 1031
fd38203a
LP
1032 return 0;
1033}
1034
52444dc4
LP
1035static void initialize_perturb(sd_event *e) {
1036 sd_id128_t bootid = {};
1037
1038 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1039 the same time within each minute/second/250ms, so that
52444dc4
LP
1040 events all across the system can be coalesced into a single
1041 CPU wakeup. However, let's take some system-specific
1042 randomness for this value, so that in a network of systems
1043 with synced clocks timer events are distributed a
1044 bit. Here, we calculate a perturbation usec offset from the
1045 boot ID. */
1046
3a43da28 1047 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1048 return;
1049
1050 if (sd_id128_get_boot(&bootid) >= 0)
1051 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1052}
1053
fd38203a
LP
1054static int event_setup_timer_fd(
1055 sd_event *e,
6a0f1f6d
LP
1056 struct clock_data *d,
1057 clockid_t clock) {
fd38203a 1058
fd38203a 1059 assert(e);
6a0f1f6d 1060 assert(d);
fd38203a 1061
6a0f1f6d 1062 if (_likely_(d->fd >= 0))
fd38203a
LP
1063 return 0;
1064
b44d87e2 1065 _cleanup_close_ int fd = -1;
b44d87e2 1066
6a0f1f6d 1067 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1068 if (fd < 0)
1069 return -errno;
1070
7fe2903c
LP
1071 fd = fd_move_above_stdio(fd);
1072
1eac7948 1073 struct epoll_event ev = {
a82f89aa
LP
1074 .events = EPOLLIN,
1075 .data.ptr = d,
1076 };
fd38203a 1077
15c689d7 1078 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1079 return -errno;
fd38203a 1080
b44d87e2 1081 d->fd = TAKE_FD(fd);
fd38203a
LP
1082 return 0;
1083}
1084
c4f1aff2
TG
1085static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1086 assert(s);
1087
1088 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1089}
1090
41c63f36
LP
1091static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1092 int r;
1093
1094 assert(d);
1095
1096 if (d->fd < 0) {
1097 r = event_setup_timer_fd(e, d, clock);
1098 if (r < 0)
1099 return r;
1100 }
1101
1102 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1103 if (r < 0)
1104 return r;
1105
1106 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1107 if (r < 0)
1108 return r;
1109
1110 return 0;
1111}
1112
6a0f1f6d 1113_public_ int sd_event_add_time(
fd38203a 1114 sd_event *e,
151b9b96 1115 sd_event_source **ret,
6a0f1f6d 1116 clockid_t clock,
fd38203a 1117 uint64_t usec,
c2ba3ad6 1118 uint64_t accuracy,
718db961 1119 sd_event_time_handler_t callback,
151b9b96 1120 void *userdata) {
fd38203a 1121
6a0f1f6d 1122 EventSourceType type;
ec766a51 1123 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1124 struct clock_data *d;
fd38203a
LP
1125 int r;
1126
305f78bf 1127 assert_return(e, -EINVAL);
b937d761 1128 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 1129 assert_return(accuracy != (uint64_t) -1, -EINVAL);
da7e457c 1130 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1131 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1132
e475d10c
LP
1133 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1134 return -EOPNOTSUPP;
1135
1136 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1137 if (type < 0)
3411372e
LP
1138 return -EOPNOTSUPP;
1139
c4f1aff2
TG
1140 if (!callback)
1141 callback = time_exit_callback;
1142
6a0f1f6d
LP
1143 d = event_get_clock_data(e, type);
1144 assert(d);
c2ba3ad6 1145
41c63f36 1146 r = setup_clock_data(e, d, clock);
c983e776
EV
1147 if (r < 0)
1148 return r;
fd38203a 1149
a71fe8b8 1150 s = source_new(e, !ret, type);
fd38203a
LP
1151 if (!s)
1152 return -ENOMEM;
1153
1154 s->time.next = usec;
c2ba3ad6 1155 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1156 s->time.callback = callback;
da7e457c 1157 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
fd38203a 1158 s->userdata = userdata;
baf76283 1159 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1160
e07bbb7c
TG
1161 d->needs_rearm = true;
1162
6a0f1f6d 1163 r = prioq_put(d->earliest, s, &s->time.earliest_index);
c2ba3ad6 1164 if (r < 0)
ec766a51 1165 return r;
c2ba3ad6 1166
6a0f1f6d 1167 r = prioq_put(d->latest, s, &s->time.latest_index);
c2ba3ad6 1168 if (r < 0)
ec766a51 1169 return r;
fd38203a 1170
a71fe8b8
LP
1171 if (ret)
1172 *ret = s;
ec766a51 1173 TAKE_PTR(s);
a71fe8b8 1174
fd38203a
LP
1175 return 0;
1176}
1177
d6a83dc4
LP
1178_public_ int sd_event_add_time_relative(
1179 sd_event *e,
1180 sd_event_source **ret,
1181 clockid_t clock,
1182 uint64_t usec,
1183 uint64_t accuracy,
1184 sd_event_time_handler_t callback,
1185 void *userdata) {
1186
1187 usec_t t;
1188 int r;
1189
1190 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1191 * checks for overflow. */
1192
1193 r = sd_event_now(e, clock, &t);
1194 if (r < 0)
1195 return r;
1196
1197 if (usec >= USEC_INFINITY - t)
1198 return -EOVERFLOW;
1199
1200 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1201}
1202
59bc1fd7
LP
1203static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1204 assert(s);
1205
1206 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1207}
1208
f7262a9f 1209_public_ int sd_event_add_signal(
305f78bf 1210 sd_event *e,
151b9b96 1211 sd_event_source **ret,
305f78bf 1212 int sig,
718db961 1213 sd_event_signal_handler_t callback,
151b9b96 1214 void *userdata) {
305f78bf 1215
ec766a51 1216 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1217 struct signal_data *d;
fd38203a
LP
1218 int r;
1219
305f78bf 1220 assert_return(e, -EINVAL);
b937d761 1221 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1222 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1223 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1224 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1225
59bc1fd7
LP
1226 if (!callback)
1227 callback = signal_exit_callback;
1228
d1b75241
LP
1229 r = signal_is_blocked(sig);
1230 if (r < 0)
1231 return r;
1232 if (r == 0)
3022d74b
LP
1233 return -EBUSY;
1234
fd38203a
LP
1235 if (!e->signal_sources) {
1236 e->signal_sources = new0(sd_event_source*, _NSIG);
1237 if (!e->signal_sources)
1238 return -ENOMEM;
1239 } else if (e->signal_sources[sig])
1240 return -EBUSY;
1241
a71fe8b8 1242 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1243 if (!s)
1244 return -ENOMEM;
1245
1246 s->signal.sig = sig;
1247 s->signal.callback = callback;
1248 s->userdata = userdata;
baf76283 1249 s->enabled = SD_EVENT_ON;
fd38203a
LP
1250
1251 e->signal_sources[sig] = s;
fd38203a 1252
9da4cb2b 1253 r = event_make_signal_data(e, sig, &d);
ec766a51 1254 if (r < 0)
9da4cb2b 1255 return r;
fd38203a 1256
f1f00dbb
LP
1257 /* Use the signal name as description for the event source by default */
1258 (void) sd_event_source_set_description(s, signal_to_string(sig));
1259
a71fe8b8
LP
1260 if (ret)
1261 *ret = s;
ec766a51 1262 TAKE_PTR(s);
a71fe8b8 1263
fd38203a
LP
1264 return 0;
1265}
1266
b9350e70
LP
1267static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1268 assert(s);
1269
1270 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1271}
1272
f8f3f926
LP
1273static bool shall_use_pidfd(void) {
1274 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1275 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1276}
1277
f7262a9f 1278_public_ int sd_event_add_child(
305f78bf 1279 sd_event *e,
151b9b96 1280 sd_event_source **ret,
305f78bf
LP
1281 pid_t pid,
1282 int options,
718db961 1283 sd_event_child_handler_t callback,
151b9b96 1284 void *userdata) {
305f78bf 1285
ec766a51 1286 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1287 int r;
1288
305f78bf 1289 assert_return(e, -EINVAL);
b937d761 1290 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1291 assert_return(pid > 1, -EINVAL);
1292 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1293 assert_return(options != 0, -EINVAL);
da7e457c 1294 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1295 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1296
b9350e70
LP
1297 if (!callback)
1298 callback = child_exit_callback;
1299
ee880b37
LP
1300 if (e->n_enabled_child_sources == 0) {
1301 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1302 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1303 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1304 * take effect.
1305 *
1306 * (As an optimization we only do this check on the first child event source created.) */
1307 r = signal_is_blocked(SIGCHLD);
1308 if (r < 0)
1309 return r;
1310 if (r == 0)
1311 return -EBUSY;
1312 }
1313
d5099efc 1314 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1315 if (r < 0)
1316 return r;
1317
4a0b58c4 1318 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1319 return -EBUSY;
1320
a71fe8b8 1321 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1322 if (!s)
1323 return -ENOMEM;
1324
f8f3f926 1325 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1326 s->child.pid = pid;
1327 s->child.options = options;
1328 s->child.callback = callback;
1329 s->userdata = userdata;
baf76283 1330 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1331
f8f3f926
LP
1332 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1333 * pin the PID, and make regular waitid() handling race-free. */
1334
1335 if (shall_use_pidfd()) {
1336 s->child.pidfd = pidfd_open(s->child.pid, 0);
1337 if (s->child.pidfd < 0) {
1338 /* Propagate errors unless the syscall is not supported or blocked */
1339 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1340 return -errno;
1341 } else
1342 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1343 } else
1344 s->child.pidfd = -1;
1345
4a0b58c4 1346 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1347 if (r < 0)
fd38203a 1348 return r;
fd38203a 1349
f8f3f926
LP
1350 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1351 /* We have a pidfd and we only want to watch for exit */
f8f3f926 1352 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1353 if (r < 0)
f8f3f926 1354 return r;
ac9f2640 1355
f8f3f926
LP
1356 } else {
1357 /* We have no pidfd or we shall wait for some other event than WEXITED */
f8f3f926 1358 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1359 if (r < 0)
f8f3f926 1360 return r;
f8f3f926
LP
1361
1362 e->need_process_child = true;
1363 }
c2ba3ad6 1364
ac9f2640
ZJS
1365 e->n_enabled_child_sources++;
1366
a71fe8b8
LP
1367 if (ret)
1368 *ret = s;
ec766a51 1369 TAKE_PTR(s);
f8f3f926
LP
1370 return 0;
1371}
1372
1373_public_ int sd_event_add_child_pidfd(
1374 sd_event *e,
1375 sd_event_source **ret,
1376 int pidfd,
1377 int options,
1378 sd_event_child_handler_t callback,
1379 void *userdata) {
1380
1381
1382 _cleanup_(source_freep) sd_event_source *s = NULL;
1383 pid_t pid;
1384 int r;
1385
1386 assert_return(e, -EINVAL);
1387 assert_return(e = event_resolve(e), -ENOPKG);
1388 assert_return(pidfd >= 0, -EBADF);
1389 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1390 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1391 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1392 assert_return(!event_pid_changed(e), -ECHILD);
1393
b9350e70
LP
1394 if (!callback)
1395 callback = child_exit_callback;
1396
ee880b37
LP
1397 if (e->n_enabled_child_sources == 0) {
1398 r = signal_is_blocked(SIGCHLD);
1399 if (r < 0)
1400 return r;
1401 if (r == 0)
1402 return -EBUSY;
1403 }
1404
f8f3f926
LP
1405 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1406 if (r < 0)
1407 return r;
1408
1409 r = pidfd_get_pid(pidfd, &pid);
1410 if (r < 0)
1411 return r;
1412
1413 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1414 return -EBUSY;
1415
1416 s = source_new(e, !ret, SOURCE_CHILD);
1417 if (!s)
1418 return -ENOMEM;
1419
1420 s->wakeup = WAKEUP_EVENT_SOURCE;
1421 s->child.pidfd = pidfd;
1422 s->child.pid = pid;
1423 s->child.options = options;
1424 s->child.callback = callback;
1425 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1426 s->userdata = userdata;
1427 s->enabled = SD_EVENT_ONESHOT;
1428
1429 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1430 if (r < 0)
1431 return r;
1432
f8f3f926
LP
1433 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1434 /* We only want to watch for WEXITED */
f8f3f926 1435 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1436 if (r < 0)
f8f3f926 1437 return r;
f8f3f926
LP
1438 } else {
1439 /* We shall wait for some other event than WEXITED */
f8f3f926 1440 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1441 if (r < 0)
f8f3f926 1442 return r;
a71fe8b8 1443
f8f3f926
LP
1444 e->need_process_child = true;
1445 }
1446
ac9f2640
ZJS
1447 e->n_enabled_child_sources++;
1448
f8f3f926
LP
1449 if (ret)
1450 *ret = s;
f8f3f926 1451 TAKE_PTR(s);
fd38203a
LP
1452 return 0;
1453}
1454
b9350e70
LP
1455static int generic_exit_callback(sd_event_source *s, void *userdata) {
1456 assert(s);
1457
1458 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1459}
1460
f7262a9f 1461_public_ int sd_event_add_defer(
305f78bf 1462 sd_event *e,
151b9b96 1463 sd_event_source **ret,
718db961 1464 sd_event_handler_t callback,
151b9b96 1465 void *userdata) {
305f78bf 1466
ec766a51 1467 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1468 int r;
1469
305f78bf 1470 assert_return(e, -EINVAL);
b937d761 1471 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1472 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1473 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1474
b9350e70
LP
1475 if (!callback)
1476 callback = generic_exit_callback;
1477
a71fe8b8 1478 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1479 if (!s)
1480 return -ENOMEM;
1481
1482 s->defer.callback = callback;
1483 s->userdata = userdata;
baf76283 1484 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1485
1486 r = source_set_pending(s, true);
ec766a51 1487 if (r < 0)
fd38203a 1488 return r;
fd38203a 1489
a71fe8b8
LP
1490 if (ret)
1491 *ret = s;
ec766a51 1492 TAKE_PTR(s);
a71fe8b8 1493
fd38203a
LP
1494 return 0;
1495}
1496
6e9feda3
LP
1497_public_ int sd_event_add_post(
1498 sd_event *e,
1499 sd_event_source **ret,
1500 sd_event_handler_t callback,
1501 void *userdata) {
1502
ec766a51 1503 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1504 int r;
1505
1506 assert_return(e, -EINVAL);
b937d761 1507 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1508 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1509 assert_return(!event_pid_changed(e), -ECHILD);
1510
b9350e70
LP
1511 if (!callback)
1512 callback = generic_exit_callback;
1513
a71fe8b8 1514 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1515 if (!s)
1516 return -ENOMEM;
1517
1518 s->post.callback = callback;
1519 s->userdata = userdata;
1520 s->enabled = SD_EVENT_ON;
1521
de7fef4b 1522 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1523 if (r < 0)
6e9feda3 1524 return r;
de7fef4b 1525 assert(r > 0);
6e9feda3 1526
a71fe8b8
LP
1527 if (ret)
1528 *ret = s;
ec766a51 1529 TAKE_PTR(s);
a71fe8b8 1530
6e9feda3
LP
1531 return 0;
1532}
1533
6203e07a 1534_public_ int sd_event_add_exit(
305f78bf 1535 sd_event *e,
151b9b96 1536 sd_event_source **ret,
718db961 1537 sd_event_handler_t callback,
151b9b96 1538 void *userdata) {
305f78bf 1539
ec766a51 1540 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1541 int r;
1542
1543 assert_return(e, -EINVAL);
b937d761 1544 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1545 assert_return(callback, -EINVAL);
1546 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1547 assert_return(!event_pid_changed(e), -ECHILD);
1548
c983e776
EV
1549 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1550 if (r < 0)
1551 return r;
da7e457c 1552
a71fe8b8 1553 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1554 if (!s)
da7e457c 1555 return -ENOMEM;
fd38203a 1556
6203e07a 1557 s->exit.callback = callback;
da7e457c 1558 s->userdata = userdata;
6203e07a 1559 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1560 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1561
6203e07a 1562 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1563 if (r < 0)
da7e457c 1564 return r;
da7e457c 1565
a71fe8b8
LP
1566 if (ret)
1567 *ret = s;
ec766a51 1568 TAKE_PTR(s);
a71fe8b8 1569
da7e457c
LP
1570 return 0;
1571}
1572
97ef5391
LP
1573static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1574 assert(e);
1575
1576 if (!d)
1577 return;
1578
1579 assert(hashmap_isempty(d->inodes));
1580 assert(hashmap_isempty(d->wd));
1581
1582 if (d->buffer_filled > 0)
1583 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1584
1585 hashmap_free(d->inodes);
1586 hashmap_free(d->wd);
1587
1588 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1589
1590 if (d->fd >= 0) {
1591 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1592 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1593
1594 safe_close(d->fd);
1595 }
1596 free(d);
1597}
1598
1599static int event_make_inotify_data(
1600 sd_event *e,
1601 int64_t priority,
1602 struct inotify_data **ret) {
1603
1604 _cleanup_close_ int fd = -1;
1605 struct inotify_data *d;
97ef5391
LP
1606 int r;
1607
1608 assert(e);
1609
1610 d = hashmap_get(e->inotify_data, &priority);
1611 if (d) {
1612 if (ret)
1613 *ret = d;
1614 return 0;
1615 }
1616
1617 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1618 if (fd < 0)
1619 return -errno;
1620
1621 fd = fd_move_above_stdio(fd);
1622
1623 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1624 if (r < 0)
1625 return r;
1626
1627 d = new(struct inotify_data, 1);
1628 if (!d)
1629 return -ENOMEM;
1630
1631 *d = (struct inotify_data) {
1632 .wakeup = WAKEUP_INOTIFY_DATA,
1633 .fd = TAKE_FD(fd),
1634 .priority = priority,
1635 };
1636
1637 r = hashmap_put(e->inotify_data, &d->priority, d);
1638 if (r < 0) {
1639 d->fd = safe_close(d->fd);
1640 free(d);
1641 return r;
1642 }
1643
1eac7948 1644 struct epoll_event ev = {
97ef5391
LP
1645 .events = EPOLLIN,
1646 .data.ptr = d,
1647 };
1648
1649 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1650 r = -errno;
1651 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1652 * remove the fd from the epoll first, which we don't want as we couldn't
1653 * add it in the first place. */
1654 event_free_inotify_data(e, d);
1655 return r;
1656 }
1657
1658 if (ret)
1659 *ret = d;
1660
1661 return 1;
1662}
1663
7a08d314 1664static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1665 int r;
97ef5391
LP
1666
1667 assert(x);
1668 assert(y);
1669
90c88092
YW
1670 r = CMP(x->dev, y->dev);
1671 if (r != 0)
1672 return r;
97ef5391 1673
6dd91b36 1674 return CMP(x->ino, y->ino);
97ef5391
LP
1675}
1676
7a08d314
YW
1677static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1678 assert(d);
97ef5391
LP
1679
1680 siphash24_compress(&d->dev, sizeof(d->dev), state);
1681 siphash24_compress(&d->ino, sizeof(d->ino), state);
1682}
1683
7a08d314 1684DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1685
1686static void event_free_inode_data(
1687 sd_event *e,
1688 struct inode_data *d) {
1689
1690 assert(e);
1691
1692 if (!d)
1693 return;
1694
1695 assert(!d->event_sources);
1696
1697 if (d->fd >= 0) {
1698 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1699 safe_close(d->fd);
1700 }
1701
1702 if (d->inotify_data) {
1703
1704 if (d->wd >= 0) {
1705 if (d->inotify_data->fd >= 0) {
1706 /* So here's a problem. At the time this runs the watch descriptor might already be
1707 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1708 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1709 * likely case to happen. */
1710
1711 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1712 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1713 }
1714
1715 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1716 }
1717
1718 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1719 }
1720
1721 free(d);
1722}
1723
1724static void event_gc_inode_data(
1725 sd_event *e,
1726 struct inode_data *d) {
1727
1728 struct inotify_data *inotify_data;
1729
1730 assert(e);
1731
1732 if (!d)
1733 return;
1734
1735 if (d->event_sources)
1736 return;
1737
1738 inotify_data = d->inotify_data;
1739 event_free_inode_data(e, d);
1740
1741 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1742 event_free_inotify_data(e, inotify_data);
1743}
1744
1745static int event_make_inode_data(
1746 sd_event *e,
1747 struct inotify_data *inotify_data,
1748 dev_t dev,
1749 ino_t ino,
1750 struct inode_data **ret) {
1751
1752 struct inode_data *d, key;
1753 int r;
1754
1755 assert(e);
1756 assert(inotify_data);
1757
1758 key = (struct inode_data) {
1759 .ino = ino,
1760 .dev = dev,
1761 };
1762
1763 d = hashmap_get(inotify_data->inodes, &key);
1764 if (d) {
1765 if (ret)
1766 *ret = d;
1767
1768 return 0;
1769 }
1770
1771 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1772 if (r < 0)
1773 return r;
1774
1775 d = new(struct inode_data, 1);
1776 if (!d)
1777 return -ENOMEM;
1778
1779 *d = (struct inode_data) {
1780 .dev = dev,
1781 .ino = ino,
1782 .wd = -1,
1783 .fd = -1,
1784 .inotify_data = inotify_data,
1785 };
1786
1787 r = hashmap_put(inotify_data->inodes, d, d);
1788 if (r < 0) {
1789 free(d);
1790 return r;
1791 }
1792
1793 if (ret)
1794 *ret = d;
1795
1796 return 1;
1797}
1798
1799static uint32_t inode_data_determine_mask(struct inode_data *d) {
1800 bool excl_unlink = true;
1801 uint32_t combined = 0;
1802 sd_event_source *s;
1803
1804 assert(d);
1805
1806 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1807 * the IN_EXCL_UNLINK flag is ANDed instead.
1808 *
1809 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1810 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1811 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1812 * events we don't care for client-side. */
1813
1814 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1815
1816 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1817 excl_unlink = false;
1818
1819 combined |= s->inotify.mask;
1820 }
1821
1822 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1823}
1824
1825static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1826 uint32_t combined_mask;
1827 int wd, r;
1828
1829 assert(d);
1830 assert(d->fd >= 0);
1831
1832 combined_mask = inode_data_determine_mask(d);
1833
1834 if (d->wd >= 0 && combined_mask == d->combined_mask)
1835 return 0;
1836
1837 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1838 if (r < 0)
1839 return r;
1840
1841 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1842 if (wd < 0)
1843 return -errno;
1844
1845 if (d->wd < 0) {
1846 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1847 if (r < 0) {
1848 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1849 return r;
1850 }
1851
1852 d->wd = wd;
1853
1854 } else if (d->wd != wd) {
1855
1856 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1857 (void) inotify_rm_watch(d->fd, wd);
1858 return -EINVAL;
1859 }
1860
1861 d->combined_mask = combined_mask;
1862 return 1;
1863}
1864
b9350e70
LP
1865static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1866 assert(s);
1867
1868 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1869}
1870
97ef5391
LP
1871_public_ int sd_event_add_inotify(
1872 sd_event *e,
1873 sd_event_source **ret,
1874 const char *path,
1875 uint32_t mask,
1876 sd_event_inotify_handler_t callback,
1877 void *userdata) {
1878
97ef5391
LP
1879 struct inotify_data *inotify_data = NULL;
1880 struct inode_data *inode_data = NULL;
1881 _cleanup_close_ int fd = -1;
8c75fe17 1882 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
1883 struct stat st;
1884 int r;
1885
1886 assert_return(e, -EINVAL);
1887 assert_return(e = event_resolve(e), -ENOPKG);
1888 assert_return(path, -EINVAL);
97ef5391
LP
1889 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1890 assert_return(!event_pid_changed(e), -ECHILD);
1891
b9350e70
LP
1892 if (!callback)
1893 callback = inotify_exit_callback;
1894
97ef5391
LP
1895 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1896 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1897 * the user can't use them for us. */
1898 if (mask & IN_MASK_ADD)
1899 return -EINVAL;
1900
1901 fd = open(path, O_PATH|O_CLOEXEC|
1902 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1903 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1904 if (fd < 0)
1905 return -errno;
1906
1907 if (fstat(fd, &st) < 0)
1908 return -errno;
1909
1910 s = source_new(e, !ret, SOURCE_INOTIFY);
1911 if (!s)
1912 return -ENOMEM;
1913
1914 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1915 s->inotify.mask = mask;
1916 s->inotify.callback = callback;
1917 s->userdata = userdata;
1918
1919 /* Allocate an inotify object for this priority, and an inode object within it */
1920 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1921 if (r < 0)
8c75fe17 1922 return r;
97ef5391
LP
1923
1924 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17
ZJS
1925 if (r < 0) {
1926 event_free_inotify_data(e, inotify_data);
1927 return r;
1928 }
97ef5391
LP
1929
1930 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1931 * the event source, until then, for which we need the original inode. */
1932 if (inode_data->fd < 0) {
1933 inode_data->fd = TAKE_FD(fd);
1934 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1935 }
1936
1937 /* Link our event source to the inode data object */
1938 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1939 s->inotify.inode_data = inode_data;
1940
97ef5391
LP
1941 /* Actually realize the watch now */
1942 r = inode_data_realize_watch(e, inode_data);
1943 if (r < 0)
8c75fe17 1944 return r;
97ef5391
LP
1945
1946 (void) sd_event_source_set_description(s, path);
1947
1948 if (ret)
1949 *ret = s;
8c75fe17 1950 TAKE_PTR(s);
97ef5391
LP
1951
1952 return 0;
97ef5391
LP
1953}
1954
8301aa0b 1955static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
1956 if (!s)
1957 return NULL;
da7e457c 1958
8301aa0b
YW
1959 /* Here's a special hack: when we are called from a
1960 * dispatch handler we won't free the event source
1961 * immediately, but we will detach the fd from the
1962 * epoll. This way it is safe for the caller to unref
1963 * the event source and immediately close the fd, but
1964 * we still retain a valid event source object after
1965 * the callback. */
fd38203a 1966
8301aa0b
YW
1967 if (s->dispatching) {
1968 if (s->type == SOURCE_IO)
1969 source_io_unregister(s);
fd38203a 1970
8301aa0b
YW
1971 source_disconnect(s);
1972 } else
1973 source_free(s);
fd38203a
LP
1974
1975 return NULL;
1976}
1977
8301aa0b
YW
1978DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1979
356779df 1980_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 1981 assert_return(s, -EINVAL);
f4b2933e 1982 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1983
356779df 1984 return free_and_strdup(&s->description, description);
f7f53e9e
TG
1985}
1986
356779df 1987_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 1988 assert_return(s, -EINVAL);
356779df 1989 assert_return(description, -EINVAL);
f4b2933e 1990 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1991
7d92a1a4
ZJS
1992 if (!s->description)
1993 return -ENXIO;
1994
356779df 1995 *description = s->description;
f7f53e9e
TG
1996 return 0;
1997}
1998
adcc4ca3 1999_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 2000 assert_return(s, NULL);
eaa3cbef
LP
2001
2002 return s->event;
2003}
2004
f7262a9f 2005_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 2006 assert_return(s, -EINVAL);
6203e07a 2007 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 2008 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2009 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2010
2011 return s->pending;
2012}
2013
f7262a9f 2014_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2015 assert_return(s, -EINVAL);
2016 assert_return(s->type == SOURCE_IO, -EDOM);
2017 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2018
2019 return s->io.fd;
2020}
2021
30caf8f3
LP
2022_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2023 int r;
2024
2025 assert_return(s, -EINVAL);
8ac43fee 2026 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2027 assert_return(s->type == SOURCE_IO, -EDOM);
2028 assert_return(!event_pid_changed(s->event), -ECHILD);
2029
2030 if (s->io.fd == fd)
2031 return 0;
2032
2033 if (s->enabled == SD_EVENT_OFF) {
2034 s->io.fd = fd;
2035 s->io.registered = false;
2036 } else {
2037 int saved_fd;
2038
2039 saved_fd = s->io.fd;
2040 assert(s->io.registered);
2041
2042 s->io.fd = fd;
2043 s->io.registered = false;
2044
2045 r = source_io_register(s, s->enabled, s->io.events);
2046 if (r < 0) {
2047 s->io.fd = saved_fd;
2048 s->io.registered = true;
2049 return r;
2050 }
2051
5a795bff 2052 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2053 }
2054
2055 return 0;
2056}
2057
ab93297c
NM
2058_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2059 assert_return(s, -EINVAL);
2060 assert_return(s->type == SOURCE_IO, -EDOM);
2061
2062 return s->io.owned;
2063}
2064
2065_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2066 assert_return(s, -EINVAL);
2067 assert_return(s->type == SOURCE_IO, -EDOM);
2068
2069 s->io.owned = own;
2070 return 0;
2071}
2072
f7262a9f 2073_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2074 assert_return(s, -EINVAL);
2075 assert_return(events, -EINVAL);
2076 assert_return(s->type == SOURCE_IO, -EDOM);
2077 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2078
2079 *events = s->io.events;
2080 return 0;
2081}
2082
f7262a9f 2083_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2084 int r;
2085
305f78bf
LP
2086 assert_return(s, -EINVAL);
2087 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2088 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2089 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2090 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2091
b63c8d4f
DH
2092 /* edge-triggered updates are never skipped, so we can reset edges */
2093 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2094 return 0;
2095
2a0dc6cd
LP
2096 r = source_set_pending(s, false);
2097 if (r < 0)
2098 return r;
2099
baf76283 2100 if (s->enabled != SD_EVENT_OFF) {
e4715127 2101 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2102 if (r < 0)
2103 return r;
2104 }
2105
2106 s->io.events = events;
2107
2108 return 0;
2109}
2110
f7262a9f 2111_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2112 assert_return(s, -EINVAL);
2113 assert_return(revents, -EINVAL);
2114 assert_return(s->type == SOURCE_IO, -EDOM);
2115 assert_return(s->pending, -ENODATA);
2116 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2117
2118 *revents = s->io.revents;
2119 return 0;
2120}
2121
f7262a9f 2122_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2123 assert_return(s, -EINVAL);
2124 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2125 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2126
2127 return s->signal.sig;
2128}
2129
31927c16 2130_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2131 assert_return(s, -EINVAL);
2132 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2133
6680b8d1
ME
2134 *priority = s->priority;
2135 return 0;
fd38203a
LP
2136}
2137
31927c16 2138_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2139 bool rm_inotify = false, rm_inode = false;
2140 struct inotify_data *new_inotify_data = NULL;
2141 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2142 int r;
2143
305f78bf 2144 assert_return(s, -EINVAL);
da7e457c 2145 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2146 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2147
2148 if (s->priority == priority)
2149 return 0;
2150
97ef5391
LP
2151 if (s->type == SOURCE_INOTIFY) {
2152 struct inode_data *old_inode_data;
2153
2154 assert(s->inotify.inode_data);
2155 old_inode_data = s->inotify.inode_data;
2156
2157 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2158 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2159 * events we allow priority changes only until the first following iteration. */
2160 if (old_inode_data->fd < 0)
2161 return -EOPNOTSUPP;
2162
2163 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2164 if (r < 0)
2165 return r;
2166 rm_inotify = r > 0;
2167
2168 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2169 if (r < 0)
2170 goto fail;
2171 rm_inode = r > 0;
2172
2173 if (new_inode_data->fd < 0) {
2174 /* Duplicate the fd for the new inode object if we don't have any yet */
2175 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2176 if (new_inode_data->fd < 0) {
2177 r = -errno;
2178 goto fail;
2179 }
2180
2181 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2182 }
2183
2184 /* Move the event source to the new inode data structure */
2185 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2186 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2187 s->inotify.inode_data = new_inode_data;
2188
2189 /* Now create the new watch */
2190 r = inode_data_realize_watch(s->event, new_inode_data);
2191 if (r < 0) {
2192 /* Move it back */
2193 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2194 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2195 s->inotify.inode_data = old_inode_data;
2196 goto fail;
2197 }
2198
2199 s->priority = priority;
2200
2201 event_gc_inode_data(s->event, old_inode_data);
2202
2203 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
9da4cb2b
LP
2204 struct signal_data *old, *d;
2205
2206 /* Move us from the signalfd belonging to the old
2207 * priority to the signalfd of the new priority */
2208
2209 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2210
2211 s->priority = priority;
2212
2213 r = event_make_signal_data(s->event, s->signal.sig, &d);
2214 if (r < 0) {
2215 s->priority = old->priority;
2216 return r;
2217 }
2218
2219 event_unmask_signal_data(s->event, old, s->signal.sig);
2220 } else
2221 s->priority = priority;
fd38203a 2222
e1951c16 2223 event_source_pp_prioq_reshuffle(s);
fd38203a 2224
6203e07a
LP
2225 if (s->type == SOURCE_EXIT)
2226 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2227
fd38203a 2228 return 0;
97ef5391
LP
2229
2230fail:
2231 if (rm_inode)
2232 event_free_inode_data(s->event, new_inode_data);
2233
2234 if (rm_inotify)
2235 event_free_inotify_data(s->event, new_inotify_data);
2236
2237 return r;
fd38203a
LP
2238}
2239
f7262a9f 2240_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
305f78bf 2241 assert_return(s, -EINVAL);
305f78bf 2242 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2243
08c1eb0e
ZJS
2244 if (m)
2245 *m = s->enabled;
2246 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2247}
2248
ddfde737 2249static int event_source_disable(sd_event_source *s) {
fd38203a
LP
2250 int r;
2251
ddfde737
LP
2252 assert(s);
2253 assert(s->enabled != SD_EVENT_OFF);
fd38203a 2254
ddfde737
LP
2255 /* Unset the pending flag when this event source is disabled */
2256 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2257 r = source_set_pending(s, false);
2258 if (r < 0)
2259 return r;
2260 }
cc567911 2261
ddfde737 2262 s->enabled = SD_EVENT_OFF;
fd38203a 2263
ddfde737 2264 switch (s->type) {
fd38203a 2265
ddfde737
LP
2266 case SOURCE_IO:
2267 source_io_unregister(s);
2268 break;
ac989a78 2269
ddfde737
LP
2270 case SOURCE_TIME_REALTIME:
2271 case SOURCE_TIME_BOOTTIME:
2272 case SOURCE_TIME_MONOTONIC:
2273 case SOURCE_TIME_REALTIME_ALARM:
2274 case SOURCE_TIME_BOOTTIME_ALARM:
2275 event_source_time_prioq_reshuffle(s);
2276 break;
fd38203a 2277
ddfde737
LP
2278 case SOURCE_SIGNAL:
2279 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2280 break;
fd38203a 2281
ddfde737
LP
2282 case SOURCE_CHILD:
2283 assert(s->event->n_enabled_child_sources > 0);
2284 s->event->n_enabled_child_sources--;
fd38203a 2285
ddfde737
LP
2286 if (EVENT_SOURCE_WATCH_PIDFD(s))
2287 source_child_pidfd_unregister(s);
2288 else
2289 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2290 break;
4807d2d0 2291
ddfde737
LP
2292 case SOURCE_EXIT:
2293 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2294 break;
fd38203a 2295
ddfde737
LP
2296 case SOURCE_DEFER:
2297 case SOURCE_POST:
2298 case SOURCE_INOTIFY:
2299 break;
fd38203a 2300
ddfde737
LP
2301 default:
2302 assert_not_reached("Wut? I shouldn't exist.");
2303 }
fd38203a 2304
ddfde737
LP
2305 return 0;
2306}
f8f3f926 2307
d2eafe61 2308static int event_source_enable(sd_event_source *s, int enable) {
ddfde737 2309 int r;
fd38203a 2310
ddfde737 2311 assert(s);
d2eafe61 2312 assert(IN_SET(enable, SD_EVENT_ON, SD_EVENT_ONESHOT));
ddfde737 2313 assert(s->enabled == SD_EVENT_OFF);
305f78bf 2314
ddfde737
LP
2315 /* Unset the pending flag when this event source is enabled */
2316 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2317 r = source_set_pending(s, false);
2318 if (r < 0)
2319 return r;
2320 }
9d3e3aa5 2321
ddfde737 2322 switch (s->type) {
ddfde737 2323 case SOURCE_IO:
d2eafe61
ZJS
2324 r = source_io_register(s, enable, s->io.events);
2325 if (r < 0)
ddfde737 2326 return r;
ddfde737 2327 break;
fd38203a 2328
ddfde737
LP
2329 case SOURCE_SIGNAL:
2330 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2331 if (r < 0) {
ddfde737
LP
2332 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2333 return r;
2334 }
fd38203a 2335
ddfde737 2336 break;
fd38203a 2337
ddfde737 2338 case SOURCE_CHILD:
ddfde737
LP
2339 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2340 /* yes, we have pidfd */
9da4cb2b 2341
d2eafe61 2342 r = source_child_pidfd_register(s, enable);
ac9f2640 2343 if (r < 0)
9da4cb2b 2344 return r;
ddfde737
LP
2345 } else {
2346 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2347
ddfde737
LP
2348 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2349 if (r < 0) {
ddfde737
LP
2350 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2351 return r;
2352 }
2353 }
fd38203a 2354
ac9f2640
ZJS
2355 s->event->n_enabled_child_sources++;
2356
ddfde737 2357 break;
4807d2d0 2358
d2eafe61
ZJS
2359 case SOURCE_TIME_REALTIME:
2360 case SOURCE_TIME_BOOTTIME:
2361 case SOURCE_TIME_MONOTONIC:
2362 case SOURCE_TIME_REALTIME_ALARM:
2363 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737 2364 case SOURCE_EXIT:
ddfde737
LP
2365 case SOURCE_DEFER:
2366 case SOURCE_POST:
2367 case SOURCE_INOTIFY:
2368 break;
9da4cb2b 2369
ddfde737
LP
2370 default:
2371 assert_not_reached("Wut? I shouldn't exist.");
2372 }
f8f3f926 2373
d2eafe61
ZJS
2374 s->enabled = enable;
2375
2376 /* Non-failing operations below */
2377 switch (s->type) {
2378 case SOURCE_TIME_REALTIME:
2379 case SOURCE_TIME_BOOTTIME:
2380 case SOURCE_TIME_MONOTONIC:
2381 case SOURCE_TIME_REALTIME_ALARM:
2382 case SOURCE_TIME_BOOTTIME_ALARM:
2383 event_source_time_prioq_reshuffle(s);
2384 break;
2385
2386 case SOURCE_EXIT:
2387 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2388 break;
2389
2390 default:
2391 break;
2392 }
2393
ddfde737
LP
2394 return 0;
2395}
2396
2397_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2398 int r;
9da4cb2b 2399
ddfde737
LP
2400 assert_return(s, -EINVAL);
2401 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2402 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2403
ddfde737
LP
2404 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2405 if (s->event->state == SD_EVENT_FINISHED)
2406 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2407
ddfde737
LP
2408 if (s->enabled == m) /* No change? */
2409 return 0;
9d3e3aa5 2410
ddfde737
LP
2411 if (m == SD_EVENT_OFF)
2412 r = event_source_disable(s);
2413 else {
2414 if (s->enabled != SD_EVENT_OFF) {
2415 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2416 * event source is already enabled after all. */
2417 s->enabled = m;
2418 return 0;
fd38203a 2419 }
ddfde737
LP
2420
2421 r = event_source_enable(s, m);
fd38203a 2422 }
ddfde737
LP
2423 if (r < 0)
2424 return r;
fd38203a 2425
e1951c16 2426 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2427 return 0;
2428}
2429
f7262a9f 2430_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2431 assert_return(s, -EINVAL);
2432 assert_return(usec, -EINVAL);
6a0f1f6d 2433 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2434 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2435
2436 *usec = s->time.next;
2437 return 0;
2438}
2439
f7262a9f 2440_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2441 int r;
6a0f1f6d 2442
305f78bf 2443 assert_return(s, -EINVAL);
6a0f1f6d 2444 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2445 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2446 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2447
2a0dc6cd
LP
2448 r = source_set_pending(s, false);
2449 if (r < 0)
2450 return r;
2576a19e 2451
2a0dc6cd 2452 s->time.next = usec;
fd38203a 2453
e1951c16 2454 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2455 return 0;
2456}
2457
d6a83dc4
LP
2458_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2459 usec_t t;
2460 int r;
2461
2462 assert_return(s, -EINVAL);
2463 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2464
2465 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2466 if (r < 0)
2467 return r;
2468
2469 if (usec >= USEC_INFINITY - t)
2470 return -EOVERFLOW;
2471
2472 return sd_event_source_set_time(s, t + usec);
2473}
2474
f7262a9f 2475_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2476 assert_return(s, -EINVAL);
2477 assert_return(usec, -EINVAL);
6a0f1f6d 2478 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2479 assert_return(!event_pid_changed(s->event), -ECHILD);
2480
2481 *usec = s->time.accuracy;
2482 return 0;
2483}
2484
f7262a9f 2485_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2486 int r;
6a0f1f6d 2487
305f78bf
LP
2488 assert_return(s, -EINVAL);
2489 assert_return(usec != (uint64_t) -1, -EINVAL);
6a0f1f6d 2490 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2491 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2492 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2493
2a0dc6cd
LP
2494 r = source_set_pending(s, false);
2495 if (r < 0)
2496 return r;
2497
eaa3cbef
LP
2498 if (usec == 0)
2499 usec = DEFAULT_ACCURACY_USEC;
2500
eaa3cbef
LP
2501 s->time.accuracy = usec;
2502
e1951c16 2503 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2504 return 0;
2505}
2506
2507_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2508 assert_return(s, -EINVAL);
2509 assert_return(clock, -EINVAL);
2510 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2511 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2512
6a0f1f6d 2513 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2514 return 0;
2515}
2516
f7262a9f 2517_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2518 assert_return(s, -EINVAL);
2519 assert_return(pid, -EINVAL);
2520 assert_return(s->type == SOURCE_CHILD, -EDOM);
2521 assert_return(!event_pid_changed(s->event), -ECHILD);
2522
2523 *pid = s->child.pid;
2524 return 0;
2525}
2526
f8f3f926
LP
2527_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2528 assert_return(s, -EINVAL);
2529 assert_return(s->type == SOURCE_CHILD, -EDOM);
2530 assert_return(!event_pid_changed(s->event), -ECHILD);
2531
2532 if (s->child.pidfd < 0)
2533 return -EOPNOTSUPP;
2534
2535 return s->child.pidfd;
2536}
2537
2538_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2539 assert_return(s, -EINVAL);
2540 assert_return(s->type == SOURCE_CHILD, -EDOM);
2541 assert_return(!event_pid_changed(s->event), -ECHILD);
2542 assert_return(SIGNAL_VALID(sig), -EINVAL);
2543
2544 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2545 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2546 * available. */
2547 if (s->child.exited)
2548 return -ESRCH;
2549
2550 if (s->child.pidfd >= 0) {
2551 siginfo_t copy;
2552
2553 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2554 * structure here */
2555 if (si)
2556 copy = *si;
2557
2558 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2559 /* Let's propagate the error only if the system call is not implemented or prohibited */
2560 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2561 return -errno;
2562 } else
2563 return 0;
2564 }
2565
2566 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2567 * this here. */
2568 if (flags != 0)
2569 return -EOPNOTSUPP;
2570
2571 if (si) {
2572 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2573 siginfo_t copy = *si;
2574
2575 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2576 return -errno;
2577 } else if (kill(s->child.pid, sig) < 0)
2578 return -errno;
2579
2580 return 0;
2581}
2582
2583_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2584 assert_return(s, -EINVAL);
2585 assert_return(s->type == SOURCE_CHILD, -EDOM);
2586
2587 if (s->child.pidfd < 0)
2588 return -EOPNOTSUPP;
2589
2590 return s->child.pidfd_owned;
2591}
2592
2593_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2594 assert_return(s, -EINVAL);
2595 assert_return(s->type == SOURCE_CHILD, -EDOM);
2596
2597 if (s->child.pidfd < 0)
2598 return -EOPNOTSUPP;
2599
2600 s->child.pidfd_owned = own;
2601 return 0;
2602}
2603
2604_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2605 assert_return(s, -EINVAL);
2606 assert_return(s->type == SOURCE_CHILD, -EDOM);
2607
2608 return s->child.process_owned;
2609}
2610
2611_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2612 assert_return(s, -EINVAL);
2613 assert_return(s->type == SOURCE_CHILD, -EDOM);
2614
2615 s->child.process_owned = own;
2616 return 0;
2617}
2618
97ef5391
LP
2619_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2620 assert_return(s, -EINVAL);
2621 assert_return(mask, -EINVAL);
2622 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2623 assert_return(!event_pid_changed(s->event), -ECHILD);
2624
2625 *mask = s->inotify.mask;
2626 return 0;
2627}
2628
718db961 2629_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2630 int r;
2631
da7e457c 2632 assert_return(s, -EINVAL);
6203e07a 2633 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2634 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2635 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2636
2637 if (s->prepare == callback)
2638 return 0;
2639
2640 if (callback && s->prepare) {
2641 s->prepare = callback;
2642 return 0;
2643 }
2644
2645 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2646 if (r < 0)
2647 return r;
2648
2649 s->prepare = callback;
2650
2651 if (callback) {
2652 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2653 if (r < 0)
2654 return r;
2655 } else
2656 prioq_remove(s->event->prepare, s, &s->prepare_index);
2657
2658 return 0;
2659}
2660
f7262a9f 2661_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2662 assert_return(s, NULL);
fd38203a
LP
2663
2664 return s->userdata;
2665}
2666
8f726607
LP
2667_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2668 void *ret;
2669
2670 assert_return(s, NULL);
2671
2672 ret = s->userdata;
2673 s->userdata = userdata;
2674
2675 return ret;
2676}
2677
c2ba3ad6
LP
2678static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2679 usec_t c;
2680 assert(e);
2681 assert(a <= b);
2682
2683 if (a <= 0)
2684 return 0;
393003e1
LP
2685 if (a >= USEC_INFINITY)
2686 return USEC_INFINITY;
c2ba3ad6
LP
2687
2688 if (b <= a + 1)
2689 return a;
2690
52444dc4
LP
2691 initialize_perturb(e);
2692
c2ba3ad6
LP
2693 /*
2694 Find a good time to wake up again between times a and b. We
2695 have two goals here:
2696
2697 a) We want to wake up as seldom as possible, hence prefer
2698 later times over earlier times.
2699
2700 b) But if we have to wake up, then let's make sure to
2701 dispatch as much as possible on the entire system.
2702
2703 We implement this by waking up everywhere at the same time
850516e0 2704 within any given minute if we can, synchronised via the
c2ba3ad6 2705 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
2706 then we try to find the same spot in every 10s, then 1s and
2707 then 250ms step. Otherwise, we pick the last possible time
2708 to wake up.
c2ba3ad6
LP
2709 */
2710
850516e0
LP
2711 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2712 if (c >= b) {
2713 if (_unlikely_(c < USEC_PER_MINUTE))
2714 return b;
2715
2716 c -= USEC_PER_MINUTE;
2717 }
2718
ba276c81
LP
2719 if (c >= a)
2720 return c;
2721
2722 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2723 if (c >= b) {
2724 if (_unlikely_(c < USEC_PER_SEC*10))
2725 return b;
2726
2727 c -= USEC_PER_SEC*10;
2728 }
2729
850516e0
LP
2730 if (c >= a)
2731 return c;
2732
2733 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
2734 if (c >= b) {
2735 if (_unlikely_(c < USEC_PER_SEC))
2736 return b;
2737
2738 c -= USEC_PER_SEC;
2739 }
2740
2741 if (c >= a)
2742 return c;
2743
2744 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2745 if (c >= b) {
2746 if (_unlikely_(c < USEC_PER_MSEC*250))
2747 return b;
2748
2749 c -= USEC_PER_MSEC*250;
2750 }
2751
2752 if (c >= a)
2753 return c;
2754
2755 return b;
2756}
2757
fd38203a
LP
2758static int event_arm_timer(
2759 sd_event *e,
6a0f1f6d 2760 struct clock_data *d) {
fd38203a
LP
2761
2762 struct itimerspec its = {};
c2ba3ad6
LP
2763 sd_event_source *a, *b;
2764 usec_t t;
fd38203a 2765
cde93897 2766 assert(e);
6a0f1f6d 2767 assert(d);
fd38203a 2768
d06441da 2769 if (!d->needs_rearm)
212bbb17
TG
2770 return 0;
2771 else
2772 d->needs_rearm = false;
2773
6a0f1f6d 2774 a = prioq_peek(d->earliest);
393003e1 2775 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
72aedc1e 2776
6a0f1f6d 2777 if (d->fd < 0)
c57b5ca3
LP
2778 return 0;
2779
3a43da28 2780 if (d->next == USEC_INFINITY)
72aedc1e
LP
2781 return 0;
2782
2783 /* disarm */
15c689d7
LP
2784 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
2785 return -errno;
72aedc1e 2786
3a43da28 2787 d->next = USEC_INFINITY;
fd38203a 2788 return 0;
72aedc1e 2789 }
fd38203a 2790
6a0f1f6d 2791 b = prioq_peek(d->latest);
baf76283 2792 assert_se(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 2793
1bce0ffa 2794 t = sleep_between(e, a->time.next, time_event_source_latest(b));
6a0f1f6d 2795 if (d->next == t)
fd38203a
LP
2796 return 0;
2797
6a0f1f6d 2798 assert_se(d->fd >= 0);
fd38203a 2799
c2ba3ad6 2800 if (t == 0) {
fd38203a
LP
2801 /* We don' want to disarm here, just mean some time looooong ago. */
2802 its.it_value.tv_sec = 0;
2803 its.it_value.tv_nsec = 1;
2804 } else
c2ba3ad6 2805 timespec_store(&its.it_value, t);
fd38203a 2806
15c689d7 2807 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 2808 return -errno;
fd38203a 2809
6a0f1f6d 2810 d->next = t;
fd38203a
LP
2811 return 0;
2812}
2813
9a800b56 2814static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
2815 assert(e);
2816 assert(s);
2817 assert(s->type == SOURCE_IO);
2818
9a800b56
LP
2819 /* If the event source was already pending, we just OR in the
2820 * new revents, otherwise we reset the value. The ORing is
2821 * necessary to handle EPOLLONESHOT events properly where
2822 * readability might happen independently of writability, and
2823 * we need to keep track of both */
2824
2825 if (s->pending)
2826 s->io.revents |= revents;
2827 else
2828 s->io.revents = revents;
fd38203a 2829
fd38203a
LP
2830 return source_set_pending(s, true);
2831}
2832
72aedc1e 2833static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
2834 uint64_t x;
2835 ssize_t ss;
2836
2837 assert(e);
da7e457c 2838 assert(fd >= 0);
72aedc1e 2839
305f78bf 2840 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
2841
2842 ss = read(fd, &x, sizeof(x));
2843 if (ss < 0) {
945c2931 2844 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
2845 return 0;
2846
2847 return -errno;
2848 }
2849
8d35dae7 2850 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
2851 return -EIO;
2852
cde93897 2853 if (next)
3a43da28 2854 *next = USEC_INFINITY;
72aedc1e 2855
fd38203a
LP
2856 return 0;
2857}
2858
305f78bf
LP
2859static int process_timer(
2860 sd_event *e,
2861 usec_t n,
6a0f1f6d 2862 struct clock_data *d) {
305f78bf 2863
fd38203a
LP
2864 sd_event_source *s;
2865 int r;
2866
2867 assert(e);
6a0f1f6d 2868 assert(d);
fd38203a
LP
2869
2870 for (;;) {
6a0f1f6d 2871 s = prioq_peek(d->earliest);
fd38203a
LP
2872 if (!s ||
2873 s->time.next > n ||
baf76283 2874 s->enabled == SD_EVENT_OFF ||
fd38203a
LP
2875 s->pending)
2876 break;
2877
2878 r = source_set_pending(s, true);
2879 if (r < 0)
2880 return r;
2881
e1951c16 2882 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2883 }
2884
2885 return 0;
2886}
2887
2888static int process_child(sd_event *e) {
2889 sd_event_source *s;
fd38203a
LP
2890 int r;
2891
2892 assert(e);
2893
c2ba3ad6
LP
2894 e->need_process_child = false;
2895
fd38203a
LP
2896 /*
2897 So, this is ugly. We iteratively invoke waitid() with P_PID
2898 + WNOHANG for each PID we wait for, instead of using
2899 P_ALL. This is because we only want to get child
2900 information of very specific child processes, and not all
2901 of them. We might not have processed the SIGCHLD even of a
2902 previous invocation and we don't want to maintain a
2903 unbounded *per-child* event queue, hence we really don't
2904 want anything flushed out of the kernel's queue that we
2905 don't care about. Since this is O(n) this means that if you
2906 have a lot of processes you probably want to handle SIGCHLD
2907 yourself.
08cd1552
LP
2908
2909 We do not reap the children here (by using WNOWAIT), this
2910 is only done after the event source is dispatched so that
2911 the callback still sees the process as a zombie.
fd38203a
LP
2912 */
2913
90e74a66 2914 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
2915 assert(s->type == SOURCE_CHILD);
2916
2917 if (s->pending)
2918 continue;
2919
baf76283 2920 if (s->enabled == SD_EVENT_OFF)
fd38203a
LP
2921 continue;
2922
f8f3f926
LP
2923 if (s->child.exited)
2924 continue;
2925
2926 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2927 continue;
2928
fd38203a 2929 zero(s->child.siginfo);
15c689d7
LP
2930 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
2931 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
fd38203a
LP
2932 return -errno;
2933
2934 if (s->child.siginfo.si_pid != 0) {
945c2931 2935 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 2936
f8f3f926
LP
2937 if (zombie)
2938 s->child.exited = true;
2939
08cd1552
LP
2940 if (!zombie && (s->child.options & WEXITED)) {
2941 /* If the child isn't dead then let's
2942 * immediately remove the state change
2943 * from the queue, since there's no
2944 * benefit in leaving it queued */
2945
2946 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 2947 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
2948 }
2949
fd38203a
LP
2950 r = source_set_pending(s, true);
2951 if (r < 0)
2952 return r;
2953 }
2954 }
2955
fd38203a
LP
2956 return 0;
2957}
2958
f8f3f926
LP
2959static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2960 assert(e);
2961 assert(s);
2962 assert(s->type == SOURCE_CHILD);
2963
2964 if (s->pending)
2965 return 0;
2966
2967 if (s->enabled == SD_EVENT_OFF)
2968 return 0;
2969
2970 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2971 return 0;
2972
2973 zero(s->child.siginfo);
2974 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2975 return -errno;
2976
2977 if (s->child.siginfo.si_pid == 0)
2978 return 0;
2979
2980 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2981 s->child.exited = true;
2982
2983 return source_set_pending(s, true);
2984}
2985
9da4cb2b 2986static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
fd38203a 2987 bool read_one = false;
fd38203a
LP
2988 int r;
2989
da7e457c 2990 assert(e);
97ef5391 2991 assert(d);
305f78bf 2992 assert_return(events == EPOLLIN, -EIO);
fd38203a 2993
9da4cb2b
LP
2994 /* If there's a signal queued on this priority and SIGCHLD is
2995 on this priority too, then make sure to recheck the
2996 children we watch. This is because we only ever dequeue
2997 the first signal per priority, and if we dequeue one, and
2998 SIGCHLD might be enqueued later we wouldn't know, but we
2999 might have higher priority children we care about hence we
3000 need to check that explicitly. */
3001
3002 if (sigismember(&d->sigset, SIGCHLD))
3003 e->need_process_child = true;
3004
3005 /* If there's already an event source pending for this
3006 * priority we don't read another */
3007 if (d->current)
3008 return 0;
3009
fd38203a 3010 for (;;) {
0eb2e0e3 3011 struct signalfd_siginfo si;
7057bd99 3012 ssize_t n;
92daebc0 3013 sd_event_source *s = NULL;
fd38203a 3014
9da4cb2b 3015 n = read(d->fd, &si, sizeof(si));
7057bd99 3016 if (n < 0) {
945c2931 3017 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
3018 return read_one;
3019
3020 return -errno;
3021 }
3022
7057bd99 3023 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3024 return -EIO;
3025
6eb7c172 3026 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3027
fd38203a
LP
3028 read_one = true;
3029
92daebc0
LP
3030 if (e->signal_sources)
3031 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3032 if (!s)
3033 continue;
9da4cb2b
LP
3034 if (s->pending)
3035 continue;
fd38203a
LP
3036
3037 s->signal.siginfo = si;
9da4cb2b
LP
3038 d->current = s;
3039
fd38203a
LP
3040 r = source_set_pending(s, true);
3041 if (r < 0)
3042 return r;
9da4cb2b
LP
3043
3044 return 1;
fd38203a 3045 }
fd38203a
LP
3046}
3047
97ef5391
LP
3048static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3049 ssize_t n;
3050
3051 assert(e);
3052 assert(d);
3053
3054 assert_return(revents == EPOLLIN, -EIO);
3055
3056 /* If there's already an event source pending for this priority, don't read another */
3057 if (d->n_pending > 0)
3058 return 0;
3059
3060 /* Is the read buffer non-empty? If so, let's not read more */
3061 if (d->buffer_filled > 0)
3062 return 0;
3063
3064 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3065 if (n < 0) {
3066 if (IN_SET(errno, EAGAIN, EINTR))
3067 return 0;
3068
3069 return -errno;
3070 }
3071
3072 assert(n > 0);
3073 d->buffer_filled = (size_t) n;
3074 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3075
3076 return 1;
3077}
3078
3079static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3080 assert(e);
3081 assert(d);
3082 assert(sz <= d->buffer_filled);
3083
3084 if (sz == 0)
3085 return;
3086
3087 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3088 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3089 d->buffer_filled -= sz;
3090
3091 if (d->buffer_filled == 0)
3092 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3093}
3094
3095static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3096 int r;
3097
3098 assert(e);
3099 assert(d);
3100
3101 /* If there's already an event source pending for this priority, don't read another */
3102 if (d->n_pending > 0)
3103 return 0;
3104
3105 while (d->buffer_filled > 0) {
3106 size_t sz;
3107
3108 /* Let's validate that the event structures are complete */
3109 if (d->buffer_filled < offsetof(struct inotify_event, name))
3110 return -EIO;
3111
3112 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3113 if (d->buffer_filled < sz)
3114 return -EIO;
3115
3116 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3117 struct inode_data *inode_data;
97ef5391
LP
3118
3119 /* The queue overran, let's pass this event to all event sources connected to this inotify
3120 * object */
3121
90e74a66 3122 HASHMAP_FOREACH(inode_data, d->inodes) {
97ef5391
LP
3123 sd_event_source *s;
3124
3125 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3126
3127 if (s->enabled == SD_EVENT_OFF)
3128 continue;
3129
3130 r = source_set_pending(s, true);
3131 if (r < 0)
3132 return r;
3133 }
3134 }
3135 } else {
3136 struct inode_data *inode_data;
3137 sd_event_source *s;
3138
3139 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3140 * our watch descriptor table. */
3141 if (d->buffer.ev.mask & IN_IGNORED) {
3142
3143 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3144 if (!inode_data) {
3145 event_inotify_data_drop(e, d, sz);
3146 continue;
3147 }
3148
3149 /* The watch descriptor was removed by the kernel, let's drop it here too */
3150 inode_data->wd = -1;
3151 } else {
3152 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3153 if (!inode_data) {
3154 event_inotify_data_drop(e, d, sz);
3155 continue;
3156 }
3157 }
3158
3159 /* Trigger all event sources that are interested in these events. Also trigger all event
3160 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3161 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3162
3163 if (s->enabled == SD_EVENT_OFF)
3164 continue;
3165
3166 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3167 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3168 continue;
3169
3170 r = source_set_pending(s, true);
3171 if (r < 0)
3172 return r;
3173 }
3174 }
3175
3176 /* Something pending now? If so, let's finish, otherwise let's read more. */
3177 if (d->n_pending > 0)
3178 return 1;
3179 }
3180
3181 return 0;
3182}
3183
3184static int process_inotify(sd_event *e) {
3185 struct inotify_data *d;
3186 int r, done = 0;
3187
3188 assert(e);
3189
3190 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3191 r = event_inotify_data_process(e, d);
3192 if (r < 0)
3193 return r;
3194 if (r > 0)
3195 done ++;
3196 }
3197
3198 return done;
3199}
3200
fd38203a 3201static int source_dispatch(sd_event_source *s) {
b778cba4 3202 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3203 EventSourceType saved_type;
fe8245eb 3204 int r = 0;
fd38203a
LP
3205
3206 assert(s);
6203e07a 3207 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3208
b778cba4
LP
3209 /* Save the event source type, here, so that we still know it after the event callback which might
3210 * invalidate the event. */
8f5c235d
LP
3211 saved_type = s->type;
3212
b778cba4
LP
3213 /* Similar, store a reference to the event loop object, so that we can still access it after the
3214 * callback might have invalidated/disconnected the event source. */
3215 saved_event = sd_event_ref(s->event);
3216
945c2931 3217 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3218 r = source_set_pending(s, false);
3219 if (r < 0)
3220 return r;
3221 }
fd38203a 3222
6e9feda3
LP
3223 if (s->type != SOURCE_POST) {
3224 sd_event_source *z;
6e9feda3
LP
3225
3226 /* If we execute a non-post source, let's mark all
3227 * post sources as pending */
3228
90e74a66 3229 SET_FOREACH(z, s->event->post_sources) {
6e9feda3
LP
3230 if (z->enabled == SD_EVENT_OFF)
3231 continue;
3232
3233 r = source_set_pending(z, true);
3234 if (r < 0)
3235 return r;
3236 }
3237 }
3238
baf76283
LP
3239 if (s->enabled == SD_EVENT_ONESHOT) {
3240 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3241 if (r < 0)
3242 return r;
3243 }
3244
12179984 3245 s->dispatching = true;
b7484e2a 3246
fd38203a
LP
3247 switch (s->type) {
3248
3249 case SOURCE_IO:
3250 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3251 break;
3252
6a0f1f6d 3253 case SOURCE_TIME_REALTIME:
a8548816 3254 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3255 case SOURCE_TIME_MONOTONIC:
3256 case SOURCE_TIME_REALTIME_ALARM:
3257 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3258 r = s->time.callback(s, s->time.next, s->userdata);
3259 break;
3260
3261 case SOURCE_SIGNAL:
3262 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3263 break;
3264
08cd1552
LP
3265 case SOURCE_CHILD: {
3266 bool zombie;
3267
945c2931 3268 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3269
fd38203a 3270 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3271
3272 /* Now, reap the PID for good. */
f8f3f926 3273 if (zombie) {
cc59d290 3274 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3275 s->child.waited = true;
3276 }
08cd1552 3277
fd38203a 3278 break;
08cd1552 3279 }
fd38203a
LP
3280
3281 case SOURCE_DEFER:
3282 r = s->defer.callback(s, s->userdata);
3283 break;
da7e457c 3284
6e9feda3
LP
3285 case SOURCE_POST:
3286 r = s->post.callback(s, s->userdata);
3287 break;
3288
6203e07a
LP
3289 case SOURCE_EXIT:
3290 r = s->exit.callback(s, s->userdata);
da7e457c 3291 break;
9d3e3aa5 3292
97ef5391
LP
3293 case SOURCE_INOTIFY: {
3294 struct sd_event *e = s->event;
3295 struct inotify_data *d;
3296 size_t sz;
3297
3298 assert(s->inotify.inode_data);
3299 assert_se(d = s->inotify.inode_data->inotify_data);
3300
3301 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3302 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3303 assert(d->buffer_filled >= sz);
3304
3305 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3306
3307 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3308 * buffer. */
3309 if (d->n_pending == 0)
3310 event_inotify_data_drop(e, d, sz);
3311
3312 break;
3313 }
3314
9d3e3aa5 3315 case SOURCE_WATCHDOG:
a71fe8b8 3316 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3317 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
9d3e3aa5 3318 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
3319 }
3320
12179984
LP
3321 s->dispatching = false;
3322
b778cba4
LP
3323 if (r < 0) {
3324 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3325 strna(s->description),
3326 event_source_type_to_string(saved_type),
3327 s->exit_on_failure ? "exiting" : "disabling");
3328
3329 if (s->exit_on_failure)
3330 (void) sd_event_exit(saved_event, r);
3331 }
12179984
LP
3332
3333 if (s->n_ref == 0)
3334 source_free(s);
3335 else if (r < 0)
6203e07a 3336 sd_event_source_set_enabled(s, SD_EVENT_OFF);
b7484e2a 3337
6203e07a 3338 return 1;
fd38203a
LP
3339}
3340
3341static int event_prepare(sd_event *e) {
3342 int r;
3343
3344 assert(e);
3345
3346 for (;;) {
3347 sd_event_source *s;
3348
3349 s = prioq_peek(e->prepare);
baf76283 3350 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
fd38203a
LP
3351 break;
3352
3353 s->prepare_iteration = e->iteration;
3354 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3355 if (r < 0)
3356 return r;
3357
3358 assert(s->prepare);
12179984
LP
3359
3360 s->dispatching = true;
fd38203a 3361 r = s->prepare(s, s->userdata);
12179984
LP
3362 s->dispatching = false;
3363
b778cba4
LP
3364 if (r < 0) {
3365 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3366 strna(s->description),
3367 event_source_type_to_string(s->type),
3368 s->exit_on_failure ? "exiting" : "disabling");
3369
3370 if (s->exit_on_failure)
3371 (void) sd_event_exit(e, r);
3372 }
fd38203a 3373
12179984
LP
3374 if (s->n_ref == 0)
3375 source_free(s);
3376 else if (r < 0)
3377 sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3378 }
3379
3380 return 0;
3381}
3382
6203e07a 3383static int dispatch_exit(sd_event *e) {
da7e457c 3384 sd_event_source *p;
30dd293c 3385 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
da7e457c
LP
3386 int r;
3387
3388 assert(e);
3389
6203e07a 3390 p = prioq_peek(e->exit);
baf76283 3391 if (!p || p->enabled == SD_EVENT_OFF) {
da7e457c
LP
3392 e->state = SD_EVENT_FINISHED;
3393 return 0;
3394 }
3395
30dd293c 3396 ref = sd_event_ref(e);
da7e457c 3397 e->iteration++;
6203e07a 3398 e->state = SD_EVENT_EXITING;
da7e457c 3399 r = source_dispatch(p);
2b0c9ef7 3400 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3401 return r;
3402}
3403
c2ba3ad6
LP
3404static sd_event_source* event_next_pending(sd_event *e) {
3405 sd_event_source *p;
3406
da7e457c
LP
3407 assert(e);
3408
c2ba3ad6
LP
3409 p = prioq_peek(e->pending);
3410 if (!p)
3411 return NULL;
3412
baf76283 3413 if (p->enabled == SD_EVENT_OFF)
c2ba3ad6
LP
3414 return NULL;
3415
3416 return p;
3417}
3418
cde93897
LP
3419static int arm_watchdog(sd_event *e) {
3420 struct itimerspec its = {};
3421 usec_t t;
cde93897
LP
3422
3423 assert(e);
3424 assert(e->watchdog_fd >= 0);
3425
3426 t = sleep_between(e,
3427 e->watchdog_last + (e->watchdog_period / 2),
3428 e->watchdog_last + (e->watchdog_period * 3 / 4));
3429
3430 timespec_store(&its.it_value, t);
3431
75145780
LP
3432 /* Make sure we never set the watchdog to 0, which tells the
3433 * kernel to disable it. */
3434 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3435 its.it_value.tv_nsec = 1;
3436
15c689d7 3437 if (timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897
LP
3438 return -errno;
3439
3440 return 0;
3441}
3442
3443static int process_watchdog(sd_event *e) {
3444 assert(e);
3445
3446 if (!e->watchdog)
3447 return 0;
3448
3449 /* Don't notify watchdog too often */
3450 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3451 return 0;
3452
3453 sd_notify(false, "WATCHDOG=1");
3454 e->watchdog_last = e->timestamp.monotonic;
3455
3456 return arm_watchdog(e);
3457}
3458
97ef5391
LP
3459static void event_close_inode_data_fds(sd_event *e) {
3460 struct inode_data *d;
3461
3462 assert(e);
3463
3464 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3465 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3466 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3467 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3468 * compromise. */
3469
3470 while ((d = e->inode_data_to_close)) {
3471 assert(d->fd >= 0);
3472 d->fd = safe_close(d->fd);
3473
3474 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3475 }
3476}
3477
c45a5a74
TG
3478_public_ int sd_event_prepare(sd_event *e) {
3479 int r;
fd38203a 3480
da7e457c 3481 assert_return(e, -EINVAL);
b937d761 3482 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3483 assert_return(!event_pid_changed(e), -ECHILD);
3484 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3485 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3486
e5446015
LP
3487 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3488 * this check here once, since gettid() is typically not cached, and thus want to minimize
3489 * syscalls */
3490 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3491
6203e07a 3492 if (e->exit_requested)
c45a5a74 3493 goto pending;
fd38203a
LP
3494
3495 e->iteration++;
3496
0be6c2f6 3497 e->state = SD_EVENT_PREPARING;
fd38203a 3498 r = event_prepare(e);
0be6c2f6 3499 e->state = SD_EVENT_INITIAL;
fd38203a 3500 if (r < 0)
c45a5a74 3501 return r;
fd38203a 3502
6a0f1f6d
LP
3503 r = event_arm_timer(e, &e->realtime);
3504 if (r < 0)
c45a5a74 3505 return r;
6a0f1f6d 3506
a8548816
TG
3507 r = event_arm_timer(e, &e->boottime);
3508 if (r < 0)
c45a5a74 3509 return r;
a8548816 3510
6a0f1f6d
LP
3511 r = event_arm_timer(e, &e->monotonic);
3512 if (r < 0)
c45a5a74 3513 return r;
6a0f1f6d
LP
3514
3515 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3516 if (r < 0)
c45a5a74 3517 return r;
fd38203a 3518
6a0f1f6d 3519 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3520 if (r < 0)
c45a5a74 3521 return r;
fd38203a 3522
97ef5391
LP
3523 event_close_inode_data_fds(e);
3524
1b5995b0 3525 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3526 goto pending;
3527
2b0c9ef7 3528 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3529
3530 return 0;
3531
3532pending:
2b0c9ef7 3533 e->state = SD_EVENT_ARMED;
6d148a84
TG
3534 r = sd_event_wait(e, 0);
3535 if (r == 0)
2b0c9ef7 3536 e->state = SD_EVENT_ARMED;
6d148a84
TG
3537
3538 return r;
c45a5a74
TG
3539}
3540
3541_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
5cddd924 3542 size_t event_queue_max;
c45a5a74
TG
3543 int r, m, i;
3544
3545 assert_return(e, -EINVAL);
b937d761 3546 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3547 assert_return(!event_pid_changed(e), -ECHILD);
3548 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3549 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
c45a5a74
TG
3550
3551 if (e->exit_requested) {
3552 e->state = SD_EVENT_PENDING;
3553 return 1;
3554 }
6a0f1f6d 3555
5cddd924
LP
3556 event_queue_max = MAX(e->n_sources, 1u);
3557 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3558 return -ENOMEM;
fd38203a 3559
97ef5391
LP
3560 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3561 if (e->inotify_data_buffered)
3562 timeout = 0;
3563
5cddd924 3564 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
bab4820e 3565 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
da7e457c 3566 if (m < 0) {
c45a5a74
TG
3567 if (errno == EINTR) {
3568 e->state = SD_EVENT_PENDING;
3569 return 1;
3570 }
3571
3572 r = -errno;
da7e457c
LP
3573 goto finish;
3574 }
fd38203a 3575
e475d10c 3576 triple_timestamp_get(&e->timestamp);
fd38203a
LP
3577
3578 for (i = 0; i < m; i++) {
3579
5cddd924
LP
3580 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3581 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3582 else {
5cddd924 3583 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3584
3585 switch (*t) {
3586
f8f3f926 3587 case WAKEUP_EVENT_SOURCE: {
5cddd924 3588 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
3589
3590 assert(s);
3591
3592 switch (s->type) {
3593
3594 case SOURCE_IO:
5cddd924 3595 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
3596 break;
3597
3598 case SOURCE_CHILD:
5cddd924 3599 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
3600 break;
3601
3602 default:
3603 assert_not_reached("Unexpected event source type");
3604 }
3605
9da4cb2b 3606 break;
f8f3f926 3607 }
fd38203a 3608
9da4cb2b 3609 case WAKEUP_CLOCK_DATA: {
5cddd924 3610 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
3611
3612 assert(d);
3613
5cddd924 3614 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
3615 break;
3616 }
3617
3618 case WAKEUP_SIGNAL_DATA:
5cddd924 3619 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
9da4cb2b
LP
3620 break;
3621
97ef5391 3622 case WAKEUP_INOTIFY_DATA:
5cddd924 3623 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
97ef5391
LP
3624 break;
3625
9da4cb2b
LP
3626 default:
3627 assert_not_reached("Invalid wake-up pointer");
3628 }
3629 }
fd38203a 3630 if (r < 0)
da7e457c 3631 goto finish;
fd38203a
LP
3632 }
3633
cde93897
LP
3634 r = process_watchdog(e);
3635 if (r < 0)
3636 goto finish;
3637
6a0f1f6d
LP
3638 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3639 if (r < 0)
3640 goto finish;
3641
e475d10c 3642 r = process_timer(e, e->timestamp.boottime, &e->boottime);
a8548816
TG
3643 if (r < 0)
3644 goto finish;
3645
6a0f1f6d
LP
3646 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3647 if (r < 0)
3648 goto finish;
3649
3650 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 3651 if (r < 0)
da7e457c 3652 goto finish;
fd38203a 3653
e475d10c 3654 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 3655 if (r < 0)
da7e457c 3656 goto finish;
fd38203a 3657
c2ba3ad6 3658 if (e->need_process_child) {
fd38203a
LP
3659 r = process_child(e);
3660 if (r < 0)
da7e457c 3661 goto finish;
fd38203a
LP
3662 }
3663
97ef5391
LP
3664 r = process_inotify(e);
3665 if (r < 0)
3666 goto finish;
3667
c45a5a74
TG
3668 if (event_next_pending(e)) {
3669 e->state = SD_EVENT_PENDING;
3670
3671 return 1;
da7e457c
LP
3672 }
3673
c45a5a74 3674 r = 0;
fd38203a 3675
da7e457c 3676finish:
2b0c9ef7 3677 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3678
3679 return r;
fd38203a
LP
3680}
3681
c45a5a74
TG
3682_public_ int sd_event_dispatch(sd_event *e) {
3683 sd_event_source *p;
3684 int r;
3685
3686 assert_return(e, -EINVAL);
b937d761 3687 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3688 assert_return(!event_pid_changed(e), -ECHILD);
3689 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3690 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3691
3692 if (e->exit_requested)
3693 return dispatch_exit(e);
3694
3695 p = event_next_pending(e);
3696 if (p) {
30dd293c 3697 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
c45a5a74 3698
30dd293c 3699 ref = sd_event_ref(e);
c45a5a74
TG
3700 e->state = SD_EVENT_RUNNING;
3701 r = source_dispatch(p);
2b0c9ef7 3702 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3703 return r;
3704 }
3705
2b0c9ef7 3706 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3707
3708 return 1;
3709}
3710
34b87517 3711static void event_log_delays(sd_event *e) {
442ac269
YW
3712 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3713 size_t l, i;
34b87517 3714
442ac269
YW
3715 p = b;
3716 l = sizeof(b);
3717 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3718 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
3719 e->delays[i] = 0;
3720 }
442ac269 3721 log_debug("Event loop iterations: %s", b);
34b87517
VC
3722}
3723
c45a5a74
TG
3724_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3725 int r;
3726
3727 assert_return(e, -EINVAL);
b937d761 3728 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3729 assert_return(!event_pid_changed(e), -ECHILD);
3730 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3731 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 3732
34b87517
VC
3733 if (e->profile_delays && e->last_run) {
3734 usec_t this_run;
3735 unsigned l;
3736
3737 this_run = now(CLOCK_MONOTONIC);
3738
3739 l = u64log2(this_run - e->last_run);
cb9d621e 3740 assert(l < ELEMENTSOF(e->delays));
34b87517
VC
3741 e->delays[l]++;
3742
3743 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3744 event_log_delays(e);
3745 e->last_log = this_run;
3746 }
3747 }
3748
c45a5a74 3749 r = sd_event_prepare(e);
53bac4e0
LP
3750 if (r == 0)
3751 /* There was nothing? Then wait... */
3752 r = sd_event_wait(e, timeout);
c45a5a74 3753
34b87517
VC
3754 if (e->profile_delays)
3755 e->last_run = now(CLOCK_MONOTONIC);
3756
02d30981 3757 if (r > 0) {
53bac4e0 3758 /* There's something now, then let's dispatch it */
02d30981
TG
3759 r = sd_event_dispatch(e);
3760 if (r < 0)
3761 return r;
53bac4e0
LP
3762
3763 return 1;
3764 }
3765
3766 return r;
c45a5a74
TG
3767}
3768
f7262a9f 3769_public_ int sd_event_loop(sd_event *e) {
30dd293c 3770 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
fd38203a
LP
3771 int r;
3772
da7e457c 3773 assert_return(e, -EINVAL);
b937d761 3774 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 3775 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 3776 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3777
30dd293c 3778 ref = sd_event_ref(e);
fd38203a 3779
da7e457c 3780 while (e->state != SD_EVENT_FINISHED) {
fd38203a
LP
3781 r = sd_event_run(e, (uint64_t) -1);
3782 if (r < 0)
30dd293c 3783 return r;
fd38203a
LP
3784 }
3785
30dd293c 3786 return e->exit_code;
fd38203a
LP
3787}
3788
9b364545 3789_public_ int sd_event_get_fd(sd_event *e) {
9b364545 3790 assert_return(e, -EINVAL);
b937d761 3791 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
3792 assert_return(!event_pid_changed(e), -ECHILD);
3793
3794 return e->epoll_fd;
3795}
3796
f7262a9f 3797_public_ int sd_event_get_state(sd_event *e) {
da7e457c 3798 assert_return(e, -EINVAL);
b937d761 3799 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3800 assert_return(!event_pid_changed(e), -ECHILD);
3801
3802 return e->state;
3803}
3804
6203e07a 3805_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 3806 assert_return(e, -EINVAL);
b937d761 3807 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 3808 assert_return(code, -EINVAL);
da7e457c 3809 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3810
6203e07a
LP
3811 if (!e->exit_requested)
3812 return -ENODATA;
3813
3814 *code = e->exit_code;
3815 return 0;
fd38203a
LP
3816}
3817
6203e07a 3818_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 3819 assert_return(e, -EINVAL);
b937d761 3820 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3821 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3822 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3823
6203e07a
LP
3824 e->exit_requested = true;
3825 e->exit_code = code;
3826
fd38203a
LP
3827 return 0;
3828}
46e8c825 3829
6a0f1f6d 3830_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 3831 assert_return(e, -EINVAL);
b937d761 3832 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 3833 assert_return(usec, -EINVAL);
46e8c825
LP
3834 assert_return(!event_pid_changed(e), -ECHILD);
3835
e475d10c
LP
3836 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3837 return -EOPNOTSUPP;
3838
3839 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3840 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3841 * the purpose of getting the time this doesn't matter. */
3411372e
LP
3842 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3843 return -EOPNOTSUPP;
3844
e475d10c 3845 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 3846 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
3847 *usec = now(clock);
3848 return 1;
3849 }
46e8c825 3850
e475d10c 3851 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
3852 return 0;
3853}
afc6adb5
LP
3854
3855_public_ int sd_event_default(sd_event **ret) {
39883f62 3856 sd_event *e = NULL;
afc6adb5
LP
3857 int r;
3858
3859 if (!ret)
3860 return !!default_event;
3861
3862 if (default_event) {
3863 *ret = sd_event_ref(default_event);
3864 return 0;
3865 }
3866
3867 r = sd_event_new(&e);
3868 if (r < 0)
3869 return r;
3870
3871 e->default_event_ptr = &default_event;
3872 e->tid = gettid();
3873 default_event = e;
3874
3875 *ret = e;
3876 return 1;
3877}
3878
3879_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3880 assert_return(e, -EINVAL);
b937d761 3881 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 3882 assert_return(tid, -EINVAL);
76b54375 3883 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 3884
76b54375
LP
3885 if (e->tid != 0) {
3886 *tid = e->tid;
3887 return 0;
3888 }
3889
3890 return -ENXIO;
afc6adb5 3891}
cde93897
LP
3892
3893_public_ int sd_event_set_watchdog(sd_event *e, int b) {
3894 int r;
3895
3896 assert_return(e, -EINVAL);
b937d761 3897 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 3898 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
3899
3900 if (e->watchdog == !!b)
3901 return e->watchdog;
3902
3903 if (b) {
09812eb7
LP
3904 r = sd_watchdog_enabled(false, &e->watchdog_period);
3905 if (r <= 0)
cde93897 3906 return r;
cde93897
LP
3907
3908 /* Issue first ping immediately */
3909 sd_notify(false, "WATCHDOG=1");
3910 e->watchdog_last = now(CLOCK_MONOTONIC);
3911
3912 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3913 if (e->watchdog_fd < 0)
3914 return -errno;
3915
3916 r = arm_watchdog(e);
3917 if (r < 0)
3918 goto fail;
3919
1eac7948 3920 struct epoll_event ev = {
a82f89aa
LP
3921 .events = EPOLLIN,
3922 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3923 };
cde93897 3924
15c689d7 3925 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
3926 r = -errno;
3927 goto fail;
3928 }
3929
3930 } else {
3931 if (e->watchdog_fd >= 0) {
5a795bff 3932 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 3933 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3934 }
3935 }
3936
3937 e->watchdog = !!b;
3938 return e->watchdog;
3939
3940fail:
03e334a1 3941 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3942 return r;
3943}
8f726607
LP
3944
3945_public_ int sd_event_get_watchdog(sd_event *e) {
3946 assert_return(e, -EINVAL);
b937d761 3947 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
3948 assert_return(!event_pid_changed(e), -ECHILD);
3949
3950 return e->watchdog;
3951}
60a3b1e1
LP
3952
3953_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3954 assert_return(e, -EINVAL);
b937d761 3955 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
3956 assert_return(!event_pid_changed(e), -ECHILD);
3957
3958 *ret = e->iteration;
3959 return 0;
3960}
15723a1d
LP
3961
3962_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3963 assert_return(s, -EINVAL);
3964
3965 s->destroy_callback = callback;
3966 return 0;
3967}
3968
3969_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3970 assert_return(s, -EINVAL);
3971
3972 if (ret)
3973 *ret = s->destroy_callback;
3974
3975 return !!s->destroy_callback;
3976}
2382c936
YW
3977
3978_public_ int sd_event_source_get_floating(sd_event_source *s) {
3979 assert_return(s, -EINVAL);
3980
3981 return s->floating;
3982}
3983
3984_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3985 assert_return(s, -EINVAL);
3986
3987 if (s->floating == !!b)
3988 return 0;
3989
3990 if (!s->event) /* Already disconnected */
3991 return -ESTALE;
3992
3993 s->floating = b;
3994
3995 if (b) {
3996 sd_event_source_ref(s);
3997 sd_event_unref(s->event);
3998 } else {
3999 sd_event_ref(s->event);
4000 sd_event_source_unref(s);
4001 }
4002
4003 return 1;
4004}
b778cba4
LP
4005
4006_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4007 assert_return(s, -EINVAL);
4008 assert_return(s->type != SOURCE_EXIT, -EDOM);
4009
4010 return s->exit_on_failure;
4011}
4012
4013_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4014 assert_return(s, -EINVAL);
4015 assert_return(s->type != SOURCE_EXIT, -EDOM);
4016
4017 if (s->exit_on_failure == !!b)
4018 return 0;
4019
4020 s->exit_on_failure = b;
4021 return 1;
4022}