]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
scope: on unified, make sure to unwatch all PIDs once they've been moved to the cgrou...
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
55cbfaa5
DM
40static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 53 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
54};
55
56DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
a8548816 58#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
6a0f1f6d 59
fd38203a 60struct sd_event {
da7e457c 61 unsigned n_ref;
fd38203a
LP
62
63 int epoll_fd;
cde93897 64 int watchdog_fd;
fd38203a
LP
65
66 Prioq *pending;
67 Prioq *prepare;
c2ba3ad6 68
a8548816 69 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
a8548816 73 struct clock_data boottime;
6a0f1f6d
LP
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
fd38203a 77
da7e457c
LP
78 usec_t perturb;
79
9da4cb2b
LP
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
82
83 Hashmap *child_sources;
baf76283 84 unsigned n_enabled_child_sources;
fd38203a 85
6e9feda3
LP
86 Set *post_sources;
87
6203e07a 88 Prioq *exit;
fd38203a 89
97ef5391
LP
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
da7e457c 98 pid_t original_pid;
c2ba3ad6 99
60a3b1e1 100 uint64_t iteration;
e475d10c 101 triple_timestamp timestamp;
da7e457c 102 int state;
eaa3cbef 103
6203e07a 104 bool exit_requested:1;
da7e457c 105 bool need_process_child:1;
cde93897 106 bool watchdog:1;
34b87517 107 bool profile_delays:1;
afc6adb5 108
6203e07a
LP
109 int exit_code;
110
afc6adb5
LP
111 pid_t tid;
112 sd_event **default_event_ptr;
cde93897
LP
113
114 usec_t watchdog_last, watchdog_period;
15b38f93
LP
115
116 unsigned n_sources;
a71fe8b8 117
5cddd924
LP
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
a71fe8b8 121 LIST_HEAD(sd_event_source, sources);
34b87517
VC
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
125};
126
b937d761
NM
127static thread_local sd_event *default_event = NULL;
128
a71fe8b8 129static void source_disconnect(sd_event_source *s);
97ef5391 130static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 131
b937d761
NM
132static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134}
135
fd38203a
LP
136static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
9c57a73b 138 int r;
fd38203a
LP
139
140 assert(x->pending);
141 assert(y->pending);
142
baf76283
LP
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 145 return -1;
baf76283 146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
147 return 1;
148
149 /* Lower priority values first */
9c57a73b
YW
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
fd38203a
LP
153
154 /* Older entries first */
9c57a73b 155 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
156}
157
158static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
9c57a73b 160 int r;
fd38203a
LP
161
162 assert(x->prepare);
163 assert(y->prepare);
164
8046c457
KK
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
fd38203a
LP
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
9c57a73b
YW
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
fd38203a 177
fd38203a 178 /* Lower priority values first */
9c57a73b 179 return CMP(x->priority, y->priority);
fd38203a
LP
180}
181
c2ba3ad6 182static int earliest_time_prioq_compare(const void *a, const void *b) {
fd38203a
LP
183 const sd_event_source *x = a, *y = b;
184
6a0f1f6d
LP
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
fd38203a 187
baf76283
LP
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 190 return -1;
baf76283 191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
9c57a73b 201 return CMP(x->time.next, y->time.next);
fd38203a
LP
202}
203
1bce0ffa
LP
204static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206}
207
c2ba3ad6
LP
208static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
6a0f1f6d
LP
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
c2ba3ad6 213
baf76283
LP
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
c2ba3ad6 216 return -1;
baf76283 217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
c2ba3ad6
LP
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
9c57a73b 227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
c2ba3ad6
LP
228}
229
6203e07a 230static int exit_prioq_compare(const void *a, const void *b) {
da7e457c
LP
231 const sd_event_source *x = a, *y = b;
232
6203e07a
LP
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
da7e457c 235
baf76283
LP
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
da7e457c 238 return -1;
baf76283 239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
da7e457c
LP
240 return 1;
241
242 /* Lower priority values first */
6dd91b36 243 return CMP(x->priority, y->priority);
da7e457c
LP
244}
245
6a0f1f6d
LP
246static void free_clock_data(struct clock_data *d) {
247 assert(d);
9da4cb2b 248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253}
254
8301aa0b 255static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
256 sd_event_source *s;
257
fd38203a 258 assert(e);
a71fe8b8
LP
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
15b38f93 266 assert(e->n_sources == 0);
fd38203a 267
afc6adb5
LP
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
03e334a1 271 safe_close(e->epoll_fd);
03e334a1 272 safe_close(e->watchdog_fd);
cde93897 273
6a0f1f6d 274 free_clock_data(&e->realtime);
a8548816 275 free_clock_data(&e->boottime);
6a0f1f6d
LP
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
fd38203a
LP
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
6203e07a 282 prioq_free(e->exit);
fd38203a
LP
283
284 free(e->signal_sources);
9da4cb2b 285 hashmap_free(e->signal_data);
fd38203a 286
97ef5391
LP
287 hashmap_free(e->inotify_data);
288
fd38203a 289 hashmap_free(e->child_sources);
6e9feda3 290 set_free(e->post_sources);
8301aa0b 291
5cddd924
LP
292 free(e->event_queue);
293
8301aa0b 294 return mfree(e);
fd38203a
LP
295}
296
f7262a9f 297_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
298 sd_event *e;
299 int r;
300
305f78bf 301 assert_return(ret, -EINVAL);
fd38203a 302
d08eb1fa 303 e = new(sd_event, 1);
fd38203a
LP
304 if (!e)
305 return -ENOMEM;
306
d08eb1fa
LP
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
fd38203a 329
c983e776
EV
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
fd38203a 332 goto fail;
fd38203a
LP
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
7fe2903c
LP
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
34b87517 342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
34a6843d 343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
34b87517
VC
344 e->profile_delays = true;
345 }
346
fd38203a
LP
347 *ret = e;
348 return 0;
349
350fail:
351 event_free(e);
352 return r;
353}
354
8301aa0b 355DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 356
afd15bbb
ZJS
357_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361}
362
eaa3cbef
LP
363static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
a2360a46 366 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
367 * it around over a fork(). Let's complain. */
368
df0ff127 369 return e->original_pid != getpid_cached();
eaa3cbef
LP
370}
371
366e6411 372static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
f6806734 376 if (event_pid_changed(s->event))
366e6411 377 return;
f6806734 378
fd38203a 379 if (!s->io.registered)
366e6411 380 return;
fd38203a 381
d1cf2023 382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
55cbfaa5
DM
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
385
386 s->io.registered = false;
fd38203a
LP
387}
388
305f78bf
LP
389static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
fd38203a
LP
394 assert(s);
395 assert(s->type == SOURCE_IO);
baf76283 396 assert(enabled != SD_EVENT_OFF);
fd38203a 397
1eac7948 398 struct epoll_event ev = {
a82f89aa
LP
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
fd38203a 402
15c689d7 403 if (epoll_ctl(s->event->epoll_fd,
1eac7948 404 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
55c540d3 405 s->io.fd, &ev) < 0)
fd38203a
LP
406 return -errno;
407
408 s->io.registered = true;
409
410 return 0;
411}
412
f8f3f926
LP
413static void source_child_pidfd_unregister(sd_event_source *s) {
414 assert(s);
415 assert(s->type == SOURCE_CHILD);
416
417 if (event_pid_changed(s->event))
418 return;
419
420 if (!s->child.registered)
421 return;
422
423 if (EVENT_SOURCE_WATCH_PIDFD(s))
424 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
425 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
426 strna(s->description), event_source_type_to_string(s->type));
427
428 s->child.registered = false;
429}
430
431static int source_child_pidfd_register(sd_event_source *s, int enabled) {
f8f3f926
LP
432 assert(s);
433 assert(s->type == SOURCE_CHILD);
434 assert(enabled != SD_EVENT_OFF);
435
436 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 437 struct epoll_event ev = {
f8f3f926
LP
438 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
439 .data.ptr = s,
440 };
441
55c540d3
ZJS
442 if (epoll_ctl(s->event->epoll_fd,
443 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
444 s->child.pidfd, &ev) < 0)
f8f3f926
LP
445 return -errno;
446 }
447
448 s->child.registered = true;
449 return 0;
450}
451
6a0f1f6d
LP
452static clockid_t event_source_type_to_clock(EventSourceType t) {
453
454 switch (t) {
455
456 case SOURCE_TIME_REALTIME:
457 return CLOCK_REALTIME;
458
a8548816
TG
459 case SOURCE_TIME_BOOTTIME:
460 return CLOCK_BOOTTIME;
461
6a0f1f6d
LP
462 case SOURCE_TIME_MONOTONIC:
463 return CLOCK_MONOTONIC;
464
465 case SOURCE_TIME_REALTIME_ALARM:
466 return CLOCK_REALTIME_ALARM;
467
468 case SOURCE_TIME_BOOTTIME_ALARM:
469 return CLOCK_BOOTTIME_ALARM;
470
471 default:
472 return (clockid_t) -1;
473 }
474}
475
476static EventSourceType clock_to_event_source_type(clockid_t clock) {
477
478 switch (clock) {
479
480 case CLOCK_REALTIME:
481 return SOURCE_TIME_REALTIME;
482
a8548816
TG
483 case CLOCK_BOOTTIME:
484 return SOURCE_TIME_BOOTTIME;
485
6a0f1f6d
LP
486 case CLOCK_MONOTONIC:
487 return SOURCE_TIME_MONOTONIC;
488
489 case CLOCK_REALTIME_ALARM:
490 return SOURCE_TIME_REALTIME_ALARM;
491
492 case CLOCK_BOOTTIME_ALARM:
493 return SOURCE_TIME_BOOTTIME_ALARM;
494
495 default:
496 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
497 }
498}
499
500static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
501 assert(e);
502
503 switch (t) {
504
505 case SOURCE_TIME_REALTIME:
506 return &e->realtime;
507
a8548816
TG
508 case SOURCE_TIME_BOOTTIME:
509 return &e->boottime;
510
6a0f1f6d
LP
511 case SOURCE_TIME_MONOTONIC:
512 return &e->monotonic;
513
514 case SOURCE_TIME_REALTIME_ALARM:
515 return &e->realtime_alarm;
516
517 case SOURCE_TIME_BOOTTIME_ALARM:
518 return &e->boottime_alarm;
519
520 default:
521 return NULL;
522 }
523}
524
3e4eb8e7
YW
525static void event_free_signal_data(sd_event *e, struct signal_data *d) {
526 assert(e);
527
528 if (!d)
529 return;
530
531 hashmap_remove(e->signal_data, &d->priority);
532 safe_close(d->fd);
533 free(d);
534}
535
9da4cb2b
LP
536static int event_make_signal_data(
537 sd_event *e,
538 int sig,
539 struct signal_data **ret) {
4807d2d0 540
9da4cb2b
LP
541 struct signal_data *d;
542 bool added = false;
543 sigset_t ss_copy;
544 int64_t priority;
f95387cd
ZJS
545 int r;
546
547 assert(e);
548
f6806734 549 if (event_pid_changed(e))
9da4cb2b 550 return -ECHILD;
f6806734 551
9da4cb2b
LP
552 if (e->signal_sources && e->signal_sources[sig])
553 priority = e->signal_sources[sig]->priority;
554 else
de05913d 555 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 556
9da4cb2b
LP
557 d = hashmap_get(e->signal_data, &priority);
558 if (d) {
559 if (sigismember(&d->sigset, sig) > 0) {
560 if (ret)
561 *ret = d;
562 return 0;
563 }
564 } else {
565 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
566 if (r < 0)
567 return r;
568
d08eb1fa 569 d = new(struct signal_data, 1);
9da4cb2b
LP
570 if (!d)
571 return -ENOMEM;
572
d08eb1fa
LP
573 *d = (struct signal_data) {
574 .wakeup = WAKEUP_SIGNAL_DATA,
575 .fd = -1,
576 .priority = priority,
577 };
9da4cb2b
LP
578
579 r = hashmap_put(e->signal_data, &d->priority, d);
90f604d1
ZJS
580 if (r < 0) {
581 free(d);
9da4cb2b 582 return r;
90f604d1 583 }
f95387cd 584
9da4cb2b
LP
585 added = true;
586 }
587
588 ss_copy = d->sigset;
589 assert_se(sigaddset(&ss_copy, sig) >= 0);
590
591 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
592 if (r < 0) {
593 r = -errno;
594 goto fail;
595 }
596
597 d->sigset = ss_copy;
f95387cd 598
9da4cb2b
LP
599 if (d->fd >= 0) {
600 if (ret)
601 *ret = d;
f95387cd 602 return 0;
9da4cb2b
LP
603 }
604
7fe2903c 605 d->fd = fd_move_above_stdio(r);
f95387cd 606
1eac7948 607 struct epoll_event ev = {
a82f89aa
LP
608 .events = EPOLLIN,
609 .data.ptr = d,
610 };
f95387cd 611
15c689d7 612 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
613 r = -errno;
614 goto fail;
f95387cd
ZJS
615 }
616
9da4cb2b
LP
617 if (ret)
618 *ret = d;
619
f95387cd 620 return 0;
9da4cb2b
LP
621
622fail:
3e4eb8e7
YW
623 if (added)
624 event_free_signal_data(e, d);
9da4cb2b
LP
625
626 return r;
627}
628
629static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
630 assert(e);
631 assert(d);
632
633 /* Turns off the specified signal in the signal data
634 * object. If the signal mask of the object becomes empty that
635 * way removes it. */
636
637 if (sigismember(&d->sigset, sig) == 0)
638 return;
639
640 assert_se(sigdelset(&d->sigset, sig) >= 0);
641
642 if (sigisemptyset(&d->sigset)) {
9da4cb2b 643 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 644 event_free_signal_data(e, d);
9da4cb2b
LP
645 return;
646 }
647
648 assert(d->fd >= 0);
649
650 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
651 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
652}
653
654static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
655 struct signal_data *d;
656 static const int64_t zero_priority = 0;
657
658 assert(e);
659
f8f3f926
LP
660 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
661 * and possibly drop the signalfd for it. */
9da4cb2b
LP
662
663 if (sig == SIGCHLD &&
664 e->n_enabled_child_sources > 0)
665 return;
666
667 if (e->signal_sources &&
668 e->signal_sources[sig] &&
669 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
670 return;
671
672 /*
673 * The specified signal might be enabled in three different queues:
674 *
675 * 1) the one that belongs to the priority passed (if it is non-NULL)
676 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
677 * 3) the 0 priority (to cover the SIGCHLD case)
678 *
679 * Hence, let's remove it from all three here.
680 */
681
682 if (priority) {
683 d = hashmap_get(e->signal_data, priority);
684 if (d)
685 event_unmask_signal_data(e, d, sig);
686 }
687
688 if (e->signal_sources && e->signal_sources[sig]) {
689 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
690 if (d)
691 event_unmask_signal_data(e, d, sig);
692 }
693
694 d = hashmap_get(e->signal_data, &zero_priority);
695 if (d)
696 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
697}
698
e1951c16
MS
699static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
700 assert(s);
701
702 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
703 * they are enabled/disabled or marked pending and such. */
704
705 if (s->pending)
706 prioq_reshuffle(s->event->pending, s, &s->pending_index);
707
708 if (s->prepare)
709 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
710}
711
712static void event_source_time_prioq_reshuffle(sd_event_source *s) {
713 struct clock_data *d;
714
715 assert(s);
716 assert(EVENT_SOURCE_IS_TIME(s->type));
717
718 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
719 * pending, enable state. Makes sure the two prioq's are ordered properly again. */
720 assert_se(d = event_get_clock_data(s->event, s->type));
721 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
722 prioq_reshuffle(d->latest, s, &s->time.latest_index);
723 d->needs_rearm = true;
724}
725
a71fe8b8
LP
726static void source_disconnect(sd_event_source *s) {
727 sd_event *event;
728
fd38203a
LP
729 assert(s);
730
a71fe8b8
LP
731 if (!s->event)
732 return;
15b38f93 733
a71fe8b8 734 assert(s->event->n_sources > 0);
fd38203a 735
a71fe8b8 736 switch (s->type) {
fd38203a 737
a71fe8b8
LP
738 case SOURCE_IO:
739 if (s->io.fd >= 0)
740 source_io_unregister(s);
fd38203a 741
a71fe8b8 742 break;
6a0f1f6d 743
a71fe8b8 744 case SOURCE_TIME_REALTIME:
a8548816 745 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
746 case SOURCE_TIME_MONOTONIC:
747 case SOURCE_TIME_REALTIME_ALARM:
748 case SOURCE_TIME_BOOTTIME_ALARM: {
749 struct clock_data *d;
fd38203a 750
a71fe8b8
LP
751 d = event_get_clock_data(s->event, s->type);
752 assert(d);
753
754 prioq_remove(d->earliest, s, &s->time.earliest_index);
755 prioq_remove(d->latest, s, &s->time.latest_index);
212bbb17 756 d->needs_rearm = true;
a71fe8b8
LP
757 break;
758 }
759
760 case SOURCE_SIGNAL:
761 if (s->signal.sig > 0) {
9da4cb2b 762
a71fe8b8
LP
763 if (s->event->signal_sources)
764 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 765
9da4cb2b 766 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 767 }
fd38203a 768
a71fe8b8 769 break;
fd38203a 770
a71fe8b8
LP
771 case SOURCE_CHILD:
772 if (s->child.pid > 0) {
773 if (s->enabled != SD_EVENT_OFF) {
774 assert(s->event->n_enabled_child_sources > 0);
775 s->event->n_enabled_child_sources--;
4807d2d0 776 }
fd38203a 777
4a0b58c4 778 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 779 }
fd38203a 780
f8f3f926
LP
781 if (EVENT_SOURCE_WATCH_PIDFD(s))
782 source_child_pidfd_unregister(s);
783 else
784 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
785
a71fe8b8 786 break;
fd38203a 787
a71fe8b8
LP
788 case SOURCE_DEFER:
789 /* nothing */
790 break;
fd38203a 791
a71fe8b8
LP
792 case SOURCE_POST:
793 set_remove(s->event->post_sources, s);
794 break;
da7e457c 795
a71fe8b8
LP
796 case SOURCE_EXIT:
797 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
798 break;
0eb2e0e3 799
97ef5391
LP
800 case SOURCE_INOTIFY: {
801 struct inode_data *inode_data;
802
803 inode_data = s->inotify.inode_data;
804 if (inode_data) {
805 struct inotify_data *inotify_data;
806 assert_se(inotify_data = inode_data->inotify_data);
807
808 /* Detach this event source from the inode object */
809 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
810 s->inotify.inode_data = NULL;
811
812 if (s->pending) {
813 assert(inotify_data->n_pending > 0);
814 inotify_data->n_pending--;
815 }
816
817 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
818 * continued to being watched. That's because inotify doesn't really have an API for that: we
819 * can only change watch masks with access to the original inode either by fd or by path. But
820 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 821 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
822 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
823 * there), but given the need for open_by_handle_at() which is privileged and not universally
824 * available this would be quite an incomplete solution. Hence we go the other way, leave the
825 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
826 * anymore after reception. Yes, this sucks, but … Linux … */
827
828 /* Maybe release the inode data (and its inotify) */
829 event_gc_inode_data(s->event, inode_data);
830 }
831
832 break;
833 }
834
a71fe8b8
LP
835 default:
836 assert_not_reached("Wut? I shouldn't exist.");
837 }
6e9feda3 838
a71fe8b8
LP
839 if (s->pending)
840 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 841
a71fe8b8
LP
842 if (s->prepare)
843 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 844
e514aa1e 845 event = TAKE_PTR(s->event);
a71fe8b8
LP
846 LIST_REMOVE(sources, event->sources, s);
847 event->n_sources--;
fd38203a 848
f5982559
LP
849 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
850 * pidfd associated with this event source, which we'll do only on source_free(). */
851
a71fe8b8
LP
852 if (!s->floating)
853 sd_event_unref(event);
854}
855
856static void source_free(sd_event_source *s) {
857 assert(s);
fd38203a 858
a71fe8b8 859 source_disconnect(s);
ab93297c
NM
860
861 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
862 s->io.fd = safe_close(s->io.fd);
863
f8f3f926
LP
864 if (s->type == SOURCE_CHILD) {
865 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
866
867 if (s->child.process_owned) {
868
869 if (!s->child.exited) {
870 bool sent = false;
871
872 if (s->child.pidfd >= 0) {
873 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
874 if (errno == ESRCH) /* Already dead */
875 sent = true;
876 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
877 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
878 s->child.pid);
879 } else
880 sent = true;
881 }
882
883 if (!sent)
884 if (kill(s->child.pid, SIGKILL) < 0)
885 if (errno != ESRCH) /* Already dead */
886 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
887 s->child.pid);
888 }
889
890 if (!s->child.waited) {
891 siginfo_t si = {};
892
893 /* Reap the child if we can */
894 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
895 }
896 }
897
898 if (s->child.pidfd_owned)
899 s->child.pidfd = safe_close(s->child.pidfd);
900 }
901
15723a1d
LP
902 if (s->destroy_callback)
903 s->destroy_callback(s->userdata);
ab93297c 904
356779df 905 free(s->description);
fd38203a
LP
906 free(s);
907}
8c75fe17 908DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
909
910static int source_set_pending(sd_event_source *s, bool b) {
911 int r;
912
913 assert(s);
6203e07a 914 assert(s->type != SOURCE_EXIT);
fd38203a
LP
915
916 if (s->pending == b)
917 return 0;
918
919 s->pending = b;
920
921 if (b) {
922 s->pending_iteration = s->event->iteration;
923
924 r = prioq_put(s->event->pending, s, &s->pending_index);
925 if (r < 0) {
926 s->pending = false;
927 return r;
928 }
929 } else
930 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
931
e1951c16
MS
932 if (EVENT_SOURCE_IS_TIME(s->type))
933 event_source_time_prioq_reshuffle(s);
2576a19e 934
9da4cb2b
LP
935 if (s->type == SOURCE_SIGNAL && !b) {
936 struct signal_data *d;
937
938 d = hashmap_get(s->event->signal_data, &s->priority);
939 if (d && d->current == s)
940 d->current = NULL;
941 }
942
97ef5391
LP
943 if (s->type == SOURCE_INOTIFY) {
944
945 assert(s->inotify.inode_data);
946 assert(s->inotify.inode_data->inotify_data);
947
948 if (b)
949 s->inotify.inode_data->inotify_data->n_pending ++;
950 else {
951 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
952 s->inotify.inode_data->inotify_data->n_pending --;
953 }
954 }
955
fd38203a
LP
956 return 0;
957}
958
a71fe8b8 959static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
960 sd_event_source *s;
961
962 assert(e);
963
d08eb1fa 964 s = new(sd_event_source, 1);
fd38203a
LP
965 if (!s)
966 return NULL;
967
d08eb1fa
LP
968 *s = (struct sd_event_source) {
969 .n_ref = 1,
970 .event = e,
971 .floating = floating,
972 .type = type,
973 .pending_index = PRIOQ_IDX_NULL,
974 .prepare_index = PRIOQ_IDX_NULL,
975 };
a71fe8b8
LP
976
977 if (!floating)
978 sd_event_ref(e);
fd38203a 979
a71fe8b8 980 LIST_PREPEND(sources, e->sources, s);
313cefa1 981 e->n_sources++;
15b38f93 982
fd38203a
LP
983 return s;
984}
985
b9350e70
LP
986static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
987 assert(s);
988
989 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
990}
991
f7262a9f 992_public_ int sd_event_add_io(
fd38203a 993 sd_event *e,
151b9b96 994 sd_event_source **ret,
fd38203a
LP
995 int fd,
996 uint32_t events,
718db961 997 sd_event_io_handler_t callback,
151b9b96 998 void *userdata) {
fd38203a 999
ec766a51 1000 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1001 int r;
1002
305f78bf 1003 assert_return(e, -EINVAL);
b937d761 1004 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1005 assert_return(fd >= 0, -EBADF);
2a16a986 1006 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1007 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1008 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1009
b9350e70
LP
1010 if (!callback)
1011 callback = io_exit_callback;
1012
a71fe8b8 1013 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1014 if (!s)
1015 return -ENOMEM;
1016
9da4cb2b 1017 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1018 s->io.fd = fd;
1019 s->io.events = events;
1020 s->io.callback = callback;
1021 s->userdata = userdata;
baf76283 1022 s->enabled = SD_EVENT_ON;
fd38203a 1023
baf76283 1024 r = source_io_register(s, s->enabled, events);
ec766a51 1025 if (r < 0)
050f74f2 1026 return r;
fd38203a 1027
a71fe8b8
LP
1028 if (ret)
1029 *ret = s;
ec766a51 1030 TAKE_PTR(s);
a71fe8b8 1031
fd38203a
LP
1032 return 0;
1033}
1034
52444dc4
LP
1035static void initialize_perturb(sd_event *e) {
1036 sd_id128_t bootid = {};
1037
1038 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1039 the same time within each minute/second/250ms, so that
52444dc4
LP
1040 events all across the system can be coalesced into a single
1041 CPU wakeup. However, let's take some system-specific
1042 randomness for this value, so that in a network of systems
1043 with synced clocks timer events are distributed a
1044 bit. Here, we calculate a perturbation usec offset from the
1045 boot ID. */
1046
3a43da28 1047 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1048 return;
1049
1050 if (sd_id128_get_boot(&bootid) >= 0)
1051 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1052}
1053
fd38203a
LP
1054static int event_setup_timer_fd(
1055 sd_event *e,
6a0f1f6d
LP
1056 struct clock_data *d,
1057 clockid_t clock) {
fd38203a 1058
fd38203a 1059 assert(e);
6a0f1f6d 1060 assert(d);
fd38203a 1061
6a0f1f6d 1062 if (_likely_(d->fd >= 0))
fd38203a
LP
1063 return 0;
1064
b44d87e2 1065 _cleanup_close_ int fd = -1;
b44d87e2 1066
6a0f1f6d 1067 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1068 if (fd < 0)
1069 return -errno;
1070
7fe2903c
LP
1071 fd = fd_move_above_stdio(fd);
1072
1eac7948 1073 struct epoll_event ev = {
a82f89aa
LP
1074 .events = EPOLLIN,
1075 .data.ptr = d,
1076 };
fd38203a 1077
15c689d7 1078 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1079 return -errno;
fd38203a 1080
b44d87e2 1081 d->fd = TAKE_FD(fd);
fd38203a
LP
1082 return 0;
1083}
1084
c4f1aff2
TG
1085static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1086 assert(s);
1087
1088 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1089}
1090
6a0f1f6d 1091_public_ int sd_event_add_time(
fd38203a 1092 sd_event *e,
151b9b96 1093 sd_event_source **ret,
6a0f1f6d 1094 clockid_t clock,
fd38203a 1095 uint64_t usec,
c2ba3ad6 1096 uint64_t accuracy,
718db961 1097 sd_event_time_handler_t callback,
151b9b96 1098 void *userdata) {
fd38203a 1099
6a0f1f6d 1100 EventSourceType type;
ec766a51 1101 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1102 struct clock_data *d;
fd38203a
LP
1103 int r;
1104
305f78bf 1105 assert_return(e, -EINVAL);
b937d761 1106 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 1107 assert_return(accuracy != (uint64_t) -1, -EINVAL);
da7e457c 1108 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1109 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1110
e475d10c
LP
1111 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1112 return -EOPNOTSUPP;
1113
1114 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1115 if (type < 0)
3411372e
LP
1116 return -EOPNOTSUPP;
1117
c4f1aff2
TG
1118 if (!callback)
1119 callback = time_exit_callback;
1120
6a0f1f6d
LP
1121 d = event_get_clock_data(e, type);
1122 assert(d);
c2ba3ad6 1123
c983e776
EV
1124 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1125 if (r < 0)
1126 return r;
fd38203a 1127
c983e776
EV
1128 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1129 if (r < 0)
1130 return r;
fd38203a 1131
6a0f1f6d
LP
1132 if (d->fd < 0) {
1133 r = event_setup_timer_fd(e, d, clock);
fd38203a
LP
1134 if (r < 0)
1135 return r;
1136 }
1137
a71fe8b8 1138 s = source_new(e, !ret, type);
fd38203a
LP
1139 if (!s)
1140 return -ENOMEM;
1141
1142 s->time.next = usec;
c2ba3ad6 1143 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1144 s->time.callback = callback;
da7e457c 1145 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
fd38203a 1146 s->userdata = userdata;
baf76283 1147 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1148
e07bbb7c
TG
1149 d->needs_rearm = true;
1150
6a0f1f6d 1151 r = prioq_put(d->earliest, s, &s->time.earliest_index);
c2ba3ad6 1152 if (r < 0)
ec766a51 1153 return r;
c2ba3ad6 1154
6a0f1f6d 1155 r = prioq_put(d->latest, s, &s->time.latest_index);
c2ba3ad6 1156 if (r < 0)
ec766a51 1157 return r;
fd38203a 1158
a71fe8b8
LP
1159 if (ret)
1160 *ret = s;
ec766a51 1161 TAKE_PTR(s);
a71fe8b8 1162
fd38203a
LP
1163 return 0;
1164}
1165
d6a83dc4
LP
1166_public_ int sd_event_add_time_relative(
1167 sd_event *e,
1168 sd_event_source **ret,
1169 clockid_t clock,
1170 uint64_t usec,
1171 uint64_t accuracy,
1172 sd_event_time_handler_t callback,
1173 void *userdata) {
1174
1175 usec_t t;
1176 int r;
1177
1178 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1179 * checks for overflow. */
1180
1181 r = sd_event_now(e, clock, &t);
1182 if (r < 0)
1183 return r;
1184
1185 if (usec >= USEC_INFINITY - t)
1186 return -EOVERFLOW;
1187
1188 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1189}
1190
59bc1fd7
LP
1191static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1192 assert(s);
1193
1194 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1195}
1196
f7262a9f 1197_public_ int sd_event_add_signal(
305f78bf 1198 sd_event *e,
151b9b96 1199 sd_event_source **ret,
305f78bf 1200 int sig,
718db961 1201 sd_event_signal_handler_t callback,
151b9b96 1202 void *userdata) {
305f78bf 1203
ec766a51 1204 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1205 struct signal_data *d;
fd38203a
LP
1206 int r;
1207
305f78bf 1208 assert_return(e, -EINVAL);
b937d761 1209 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1210 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1211 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1212 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1213
59bc1fd7
LP
1214 if (!callback)
1215 callback = signal_exit_callback;
1216
d1b75241
LP
1217 r = signal_is_blocked(sig);
1218 if (r < 0)
1219 return r;
1220 if (r == 0)
3022d74b
LP
1221 return -EBUSY;
1222
fd38203a
LP
1223 if (!e->signal_sources) {
1224 e->signal_sources = new0(sd_event_source*, _NSIG);
1225 if (!e->signal_sources)
1226 return -ENOMEM;
1227 } else if (e->signal_sources[sig])
1228 return -EBUSY;
1229
a71fe8b8 1230 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1231 if (!s)
1232 return -ENOMEM;
1233
1234 s->signal.sig = sig;
1235 s->signal.callback = callback;
1236 s->userdata = userdata;
baf76283 1237 s->enabled = SD_EVENT_ON;
fd38203a
LP
1238
1239 e->signal_sources[sig] = s;
fd38203a 1240
9da4cb2b 1241 r = event_make_signal_data(e, sig, &d);
ec766a51 1242 if (r < 0)
9da4cb2b 1243 return r;
fd38203a 1244
f1f00dbb
LP
1245 /* Use the signal name as description for the event source by default */
1246 (void) sd_event_source_set_description(s, signal_to_string(sig));
1247
a71fe8b8
LP
1248 if (ret)
1249 *ret = s;
ec766a51 1250 TAKE_PTR(s);
a71fe8b8 1251
fd38203a
LP
1252 return 0;
1253}
1254
b9350e70
LP
1255static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1256 assert(s);
1257
1258 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1259}
1260
f8f3f926
LP
1261static bool shall_use_pidfd(void) {
1262 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1263 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1264}
1265
f7262a9f 1266_public_ int sd_event_add_child(
305f78bf 1267 sd_event *e,
151b9b96 1268 sd_event_source **ret,
305f78bf
LP
1269 pid_t pid,
1270 int options,
718db961 1271 sd_event_child_handler_t callback,
151b9b96 1272 void *userdata) {
305f78bf 1273
ec766a51 1274 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1275 int r;
1276
305f78bf 1277 assert_return(e, -EINVAL);
b937d761 1278 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1279 assert_return(pid > 1, -EINVAL);
1280 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1281 assert_return(options != 0, -EINVAL);
da7e457c 1282 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1283 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1284
b9350e70
LP
1285 if (!callback)
1286 callback = child_exit_callback;
1287
ee880b37
LP
1288 if (e->n_enabled_child_sources == 0) {
1289 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1290 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1291 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1292 * take effect.
1293 *
1294 * (As an optimization we only do this check on the first child event source created.) */
1295 r = signal_is_blocked(SIGCHLD);
1296 if (r < 0)
1297 return r;
1298 if (r == 0)
1299 return -EBUSY;
1300 }
1301
d5099efc 1302 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1303 if (r < 0)
1304 return r;
1305
4a0b58c4 1306 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1307 return -EBUSY;
1308
a71fe8b8 1309 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1310 if (!s)
1311 return -ENOMEM;
1312
f8f3f926 1313 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1314 s->child.pid = pid;
1315 s->child.options = options;
1316 s->child.callback = callback;
1317 s->userdata = userdata;
baf76283 1318 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1319
f8f3f926
LP
1320 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1321 * pin the PID, and make regular waitid() handling race-free. */
1322
1323 if (shall_use_pidfd()) {
1324 s->child.pidfd = pidfd_open(s->child.pid, 0);
1325 if (s->child.pidfd < 0) {
1326 /* Propagate errors unless the syscall is not supported or blocked */
1327 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1328 return -errno;
1329 } else
1330 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1331 } else
1332 s->child.pidfd = -1;
1333
4a0b58c4 1334 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1335 if (r < 0)
fd38203a 1336 return r;
fd38203a 1337
f8f3f926
LP
1338 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1339 /* We have a pidfd and we only want to watch for exit */
f8f3f926 1340 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1341 if (r < 0)
f8f3f926 1342 return r;
ac9f2640 1343
f8f3f926
LP
1344 } else {
1345 /* We have no pidfd or we shall wait for some other event than WEXITED */
f8f3f926 1346 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1347 if (r < 0)
f8f3f926 1348 return r;
f8f3f926
LP
1349
1350 e->need_process_child = true;
1351 }
c2ba3ad6 1352
ac9f2640
ZJS
1353 e->n_enabled_child_sources++;
1354
a71fe8b8
LP
1355 if (ret)
1356 *ret = s;
ec766a51 1357 TAKE_PTR(s);
f8f3f926
LP
1358 return 0;
1359}
1360
1361_public_ int sd_event_add_child_pidfd(
1362 sd_event *e,
1363 sd_event_source **ret,
1364 int pidfd,
1365 int options,
1366 sd_event_child_handler_t callback,
1367 void *userdata) {
1368
1369
1370 _cleanup_(source_freep) sd_event_source *s = NULL;
1371 pid_t pid;
1372 int r;
1373
1374 assert_return(e, -EINVAL);
1375 assert_return(e = event_resolve(e), -ENOPKG);
1376 assert_return(pidfd >= 0, -EBADF);
1377 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1378 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1379 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1380 assert_return(!event_pid_changed(e), -ECHILD);
1381
b9350e70
LP
1382 if (!callback)
1383 callback = child_exit_callback;
1384
ee880b37
LP
1385 if (e->n_enabled_child_sources == 0) {
1386 r = signal_is_blocked(SIGCHLD);
1387 if (r < 0)
1388 return r;
1389 if (r == 0)
1390 return -EBUSY;
1391 }
1392
f8f3f926
LP
1393 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1394 if (r < 0)
1395 return r;
1396
1397 r = pidfd_get_pid(pidfd, &pid);
1398 if (r < 0)
1399 return r;
1400
1401 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1402 return -EBUSY;
1403
1404 s = source_new(e, !ret, SOURCE_CHILD);
1405 if (!s)
1406 return -ENOMEM;
1407
1408 s->wakeup = WAKEUP_EVENT_SOURCE;
1409 s->child.pidfd = pidfd;
1410 s->child.pid = pid;
1411 s->child.options = options;
1412 s->child.callback = callback;
1413 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1414 s->userdata = userdata;
1415 s->enabled = SD_EVENT_ONESHOT;
1416
1417 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1418 if (r < 0)
1419 return r;
1420
f8f3f926
LP
1421 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1422 /* We only want to watch for WEXITED */
f8f3f926 1423 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1424 if (r < 0)
f8f3f926 1425 return r;
f8f3f926
LP
1426 } else {
1427 /* We shall wait for some other event than WEXITED */
f8f3f926 1428 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1429 if (r < 0)
f8f3f926 1430 return r;
a71fe8b8 1431
f8f3f926
LP
1432 e->need_process_child = true;
1433 }
1434
ac9f2640
ZJS
1435 e->n_enabled_child_sources++;
1436
f8f3f926
LP
1437 if (ret)
1438 *ret = s;
f8f3f926 1439 TAKE_PTR(s);
fd38203a
LP
1440 return 0;
1441}
1442
b9350e70
LP
1443static int generic_exit_callback(sd_event_source *s, void *userdata) {
1444 assert(s);
1445
1446 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1447}
1448
f7262a9f 1449_public_ int sd_event_add_defer(
305f78bf 1450 sd_event *e,
151b9b96 1451 sd_event_source **ret,
718db961 1452 sd_event_handler_t callback,
151b9b96 1453 void *userdata) {
305f78bf 1454
ec766a51 1455 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1456 int r;
1457
305f78bf 1458 assert_return(e, -EINVAL);
b937d761 1459 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1460 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1461 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1462
b9350e70
LP
1463 if (!callback)
1464 callback = generic_exit_callback;
1465
a71fe8b8 1466 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1467 if (!s)
1468 return -ENOMEM;
1469
1470 s->defer.callback = callback;
1471 s->userdata = userdata;
baf76283 1472 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1473
1474 r = source_set_pending(s, true);
ec766a51 1475 if (r < 0)
fd38203a 1476 return r;
fd38203a 1477
a71fe8b8
LP
1478 if (ret)
1479 *ret = s;
ec766a51 1480 TAKE_PTR(s);
a71fe8b8 1481
fd38203a
LP
1482 return 0;
1483}
1484
6e9feda3
LP
1485_public_ int sd_event_add_post(
1486 sd_event *e,
1487 sd_event_source **ret,
1488 sd_event_handler_t callback,
1489 void *userdata) {
1490
ec766a51 1491 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1492 int r;
1493
1494 assert_return(e, -EINVAL);
b937d761 1495 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1496 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1497 assert_return(!event_pid_changed(e), -ECHILD);
1498
b9350e70
LP
1499 if (!callback)
1500 callback = generic_exit_callback;
1501
a71fe8b8 1502 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1503 if (!s)
1504 return -ENOMEM;
1505
1506 s->post.callback = callback;
1507 s->userdata = userdata;
1508 s->enabled = SD_EVENT_ON;
1509
de7fef4b 1510 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1511 if (r < 0)
6e9feda3 1512 return r;
de7fef4b 1513 assert(r > 0);
6e9feda3 1514
a71fe8b8
LP
1515 if (ret)
1516 *ret = s;
ec766a51 1517 TAKE_PTR(s);
a71fe8b8 1518
6e9feda3
LP
1519 return 0;
1520}
1521
6203e07a 1522_public_ int sd_event_add_exit(
305f78bf 1523 sd_event *e,
151b9b96 1524 sd_event_source **ret,
718db961 1525 sd_event_handler_t callback,
151b9b96 1526 void *userdata) {
305f78bf 1527
ec766a51 1528 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1529 int r;
1530
1531 assert_return(e, -EINVAL);
b937d761 1532 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1533 assert_return(callback, -EINVAL);
1534 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1535 assert_return(!event_pid_changed(e), -ECHILD);
1536
c983e776
EV
1537 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1538 if (r < 0)
1539 return r;
da7e457c 1540
a71fe8b8 1541 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1542 if (!s)
da7e457c 1543 return -ENOMEM;
fd38203a 1544
6203e07a 1545 s->exit.callback = callback;
da7e457c 1546 s->userdata = userdata;
6203e07a 1547 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1548 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1549
6203e07a 1550 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1551 if (r < 0)
da7e457c 1552 return r;
da7e457c 1553
a71fe8b8
LP
1554 if (ret)
1555 *ret = s;
ec766a51 1556 TAKE_PTR(s);
a71fe8b8 1557
da7e457c
LP
1558 return 0;
1559}
1560
97ef5391
LP
1561static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1562 assert(e);
1563
1564 if (!d)
1565 return;
1566
1567 assert(hashmap_isempty(d->inodes));
1568 assert(hashmap_isempty(d->wd));
1569
1570 if (d->buffer_filled > 0)
1571 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1572
1573 hashmap_free(d->inodes);
1574 hashmap_free(d->wd);
1575
1576 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1577
1578 if (d->fd >= 0) {
1579 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1580 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1581
1582 safe_close(d->fd);
1583 }
1584 free(d);
1585}
1586
1587static int event_make_inotify_data(
1588 sd_event *e,
1589 int64_t priority,
1590 struct inotify_data **ret) {
1591
1592 _cleanup_close_ int fd = -1;
1593 struct inotify_data *d;
97ef5391
LP
1594 int r;
1595
1596 assert(e);
1597
1598 d = hashmap_get(e->inotify_data, &priority);
1599 if (d) {
1600 if (ret)
1601 *ret = d;
1602 return 0;
1603 }
1604
1605 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1606 if (fd < 0)
1607 return -errno;
1608
1609 fd = fd_move_above_stdio(fd);
1610
1611 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1612 if (r < 0)
1613 return r;
1614
1615 d = new(struct inotify_data, 1);
1616 if (!d)
1617 return -ENOMEM;
1618
1619 *d = (struct inotify_data) {
1620 .wakeup = WAKEUP_INOTIFY_DATA,
1621 .fd = TAKE_FD(fd),
1622 .priority = priority,
1623 };
1624
1625 r = hashmap_put(e->inotify_data, &d->priority, d);
1626 if (r < 0) {
1627 d->fd = safe_close(d->fd);
1628 free(d);
1629 return r;
1630 }
1631
1eac7948 1632 struct epoll_event ev = {
97ef5391
LP
1633 .events = EPOLLIN,
1634 .data.ptr = d,
1635 };
1636
1637 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1638 r = -errno;
1639 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1640 * remove the fd from the epoll first, which we don't want as we couldn't
1641 * add it in the first place. */
1642 event_free_inotify_data(e, d);
1643 return r;
1644 }
1645
1646 if (ret)
1647 *ret = d;
1648
1649 return 1;
1650}
1651
7a08d314 1652static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1653 int r;
97ef5391
LP
1654
1655 assert(x);
1656 assert(y);
1657
90c88092
YW
1658 r = CMP(x->dev, y->dev);
1659 if (r != 0)
1660 return r;
97ef5391 1661
6dd91b36 1662 return CMP(x->ino, y->ino);
97ef5391
LP
1663}
1664
7a08d314
YW
1665static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1666 assert(d);
97ef5391
LP
1667
1668 siphash24_compress(&d->dev, sizeof(d->dev), state);
1669 siphash24_compress(&d->ino, sizeof(d->ino), state);
1670}
1671
7a08d314 1672DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1673
1674static void event_free_inode_data(
1675 sd_event *e,
1676 struct inode_data *d) {
1677
1678 assert(e);
1679
1680 if (!d)
1681 return;
1682
1683 assert(!d->event_sources);
1684
1685 if (d->fd >= 0) {
1686 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1687 safe_close(d->fd);
1688 }
1689
1690 if (d->inotify_data) {
1691
1692 if (d->wd >= 0) {
1693 if (d->inotify_data->fd >= 0) {
1694 /* So here's a problem. At the time this runs the watch descriptor might already be
1695 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1696 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1697 * likely case to happen. */
1698
1699 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1700 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1701 }
1702
1703 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1704 }
1705
1706 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1707 }
1708
1709 free(d);
1710}
1711
1712static void event_gc_inode_data(
1713 sd_event *e,
1714 struct inode_data *d) {
1715
1716 struct inotify_data *inotify_data;
1717
1718 assert(e);
1719
1720 if (!d)
1721 return;
1722
1723 if (d->event_sources)
1724 return;
1725
1726 inotify_data = d->inotify_data;
1727 event_free_inode_data(e, d);
1728
1729 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1730 event_free_inotify_data(e, inotify_data);
1731}
1732
1733static int event_make_inode_data(
1734 sd_event *e,
1735 struct inotify_data *inotify_data,
1736 dev_t dev,
1737 ino_t ino,
1738 struct inode_data **ret) {
1739
1740 struct inode_data *d, key;
1741 int r;
1742
1743 assert(e);
1744 assert(inotify_data);
1745
1746 key = (struct inode_data) {
1747 .ino = ino,
1748 .dev = dev,
1749 };
1750
1751 d = hashmap_get(inotify_data->inodes, &key);
1752 if (d) {
1753 if (ret)
1754 *ret = d;
1755
1756 return 0;
1757 }
1758
1759 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1760 if (r < 0)
1761 return r;
1762
1763 d = new(struct inode_data, 1);
1764 if (!d)
1765 return -ENOMEM;
1766
1767 *d = (struct inode_data) {
1768 .dev = dev,
1769 .ino = ino,
1770 .wd = -1,
1771 .fd = -1,
1772 .inotify_data = inotify_data,
1773 };
1774
1775 r = hashmap_put(inotify_data->inodes, d, d);
1776 if (r < 0) {
1777 free(d);
1778 return r;
1779 }
1780
1781 if (ret)
1782 *ret = d;
1783
1784 return 1;
1785}
1786
1787static uint32_t inode_data_determine_mask(struct inode_data *d) {
1788 bool excl_unlink = true;
1789 uint32_t combined = 0;
1790 sd_event_source *s;
1791
1792 assert(d);
1793
1794 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1795 * the IN_EXCL_UNLINK flag is ANDed instead.
1796 *
1797 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1798 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1799 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1800 * events we don't care for client-side. */
1801
1802 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1803
1804 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1805 excl_unlink = false;
1806
1807 combined |= s->inotify.mask;
1808 }
1809
1810 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1811}
1812
1813static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1814 uint32_t combined_mask;
1815 int wd, r;
1816
1817 assert(d);
1818 assert(d->fd >= 0);
1819
1820 combined_mask = inode_data_determine_mask(d);
1821
1822 if (d->wd >= 0 && combined_mask == d->combined_mask)
1823 return 0;
1824
1825 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1826 if (r < 0)
1827 return r;
1828
1829 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1830 if (wd < 0)
1831 return -errno;
1832
1833 if (d->wd < 0) {
1834 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1835 if (r < 0) {
1836 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1837 return r;
1838 }
1839
1840 d->wd = wd;
1841
1842 } else if (d->wd != wd) {
1843
1844 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1845 (void) inotify_rm_watch(d->fd, wd);
1846 return -EINVAL;
1847 }
1848
1849 d->combined_mask = combined_mask;
1850 return 1;
1851}
1852
b9350e70
LP
1853static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1854 assert(s);
1855
1856 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1857}
1858
97ef5391
LP
1859_public_ int sd_event_add_inotify(
1860 sd_event *e,
1861 sd_event_source **ret,
1862 const char *path,
1863 uint32_t mask,
1864 sd_event_inotify_handler_t callback,
1865 void *userdata) {
1866
97ef5391
LP
1867 struct inotify_data *inotify_data = NULL;
1868 struct inode_data *inode_data = NULL;
1869 _cleanup_close_ int fd = -1;
8c75fe17 1870 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
1871 struct stat st;
1872 int r;
1873
1874 assert_return(e, -EINVAL);
1875 assert_return(e = event_resolve(e), -ENOPKG);
1876 assert_return(path, -EINVAL);
97ef5391
LP
1877 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1878 assert_return(!event_pid_changed(e), -ECHILD);
1879
b9350e70
LP
1880 if (!callback)
1881 callback = inotify_exit_callback;
1882
97ef5391
LP
1883 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1884 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1885 * the user can't use them for us. */
1886 if (mask & IN_MASK_ADD)
1887 return -EINVAL;
1888
1889 fd = open(path, O_PATH|O_CLOEXEC|
1890 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1891 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1892 if (fd < 0)
1893 return -errno;
1894
1895 if (fstat(fd, &st) < 0)
1896 return -errno;
1897
1898 s = source_new(e, !ret, SOURCE_INOTIFY);
1899 if (!s)
1900 return -ENOMEM;
1901
1902 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1903 s->inotify.mask = mask;
1904 s->inotify.callback = callback;
1905 s->userdata = userdata;
1906
1907 /* Allocate an inotify object for this priority, and an inode object within it */
1908 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1909 if (r < 0)
8c75fe17 1910 return r;
97ef5391
LP
1911
1912 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17
ZJS
1913 if (r < 0) {
1914 event_free_inotify_data(e, inotify_data);
1915 return r;
1916 }
97ef5391
LP
1917
1918 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1919 * the event source, until then, for which we need the original inode. */
1920 if (inode_data->fd < 0) {
1921 inode_data->fd = TAKE_FD(fd);
1922 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1923 }
1924
1925 /* Link our event source to the inode data object */
1926 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1927 s->inotify.inode_data = inode_data;
1928
97ef5391
LP
1929 /* Actually realize the watch now */
1930 r = inode_data_realize_watch(e, inode_data);
1931 if (r < 0)
8c75fe17 1932 return r;
97ef5391
LP
1933
1934 (void) sd_event_source_set_description(s, path);
1935
1936 if (ret)
1937 *ret = s;
8c75fe17 1938 TAKE_PTR(s);
97ef5391
LP
1939
1940 return 0;
97ef5391
LP
1941}
1942
8301aa0b 1943static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
1944 if (!s)
1945 return NULL;
da7e457c 1946
8301aa0b
YW
1947 /* Here's a special hack: when we are called from a
1948 * dispatch handler we won't free the event source
1949 * immediately, but we will detach the fd from the
1950 * epoll. This way it is safe for the caller to unref
1951 * the event source and immediately close the fd, but
1952 * we still retain a valid event source object after
1953 * the callback. */
fd38203a 1954
8301aa0b
YW
1955 if (s->dispatching) {
1956 if (s->type == SOURCE_IO)
1957 source_io_unregister(s);
fd38203a 1958
8301aa0b
YW
1959 source_disconnect(s);
1960 } else
1961 source_free(s);
fd38203a
LP
1962
1963 return NULL;
1964}
1965
8301aa0b
YW
1966DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1967
356779df 1968_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 1969 assert_return(s, -EINVAL);
f4b2933e 1970 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1971
356779df 1972 return free_and_strdup(&s->description, description);
f7f53e9e
TG
1973}
1974
356779df 1975_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 1976 assert_return(s, -EINVAL);
356779df 1977 assert_return(description, -EINVAL);
f4b2933e 1978 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1979
7d92a1a4
ZJS
1980 if (!s->description)
1981 return -ENXIO;
1982
356779df 1983 *description = s->description;
f7f53e9e
TG
1984 return 0;
1985}
1986
adcc4ca3 1987_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 1988 assert_return(s, NULL);
eaa3cbef
LP
1989
1990 return s->event;
1991}
1992
f7262a9f 1993_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 1994 assert_return(s, -EINVAL);
6203e07a 1995 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 1996 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1997 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1998
1999 return s->pending;
2000}
2001
f7262a9f 2002_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2003 assert_return(s, -EINVAL);
2004 assert_return(s->type == SOURCE_IO, -EDOM);
2005 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2006
2007 return s->io.fd;
2008}
2009
30caf8f3
LP
2010_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2011 int r;
2012
2013 assert_return(s, -EINVAL);
8ac43fee 2014 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2015 assert_return(s->type == SOURCE_IO, -EDOM);
2016 assert_return(!event_pid_changed(s->event), -ECHILD);
2017
2018 if (s->io.fd == fd)
2019 return 0;
2020
2021 if (s->enabled == SD_EVENT_OFF) {
2022 s->io.fd = fd;
2023 s->io.registered = false;
2024 } else {
2025 int saved_fd;
2026
2027 saved_fd = s->io.fd;
2028 assert(s->io.registered);
2029
2030 s->io.fd = fd;
2031 s->io.registered = false;
2032
2033 r = source_io_register(s, s->enabled, s->io.events);
2034 if (r < 0) {
2035 s->io.fd = saved_fd;
2036 s->io.registered = true;
2037 return r;
2038 }
2039
5a795bff 2040 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2041 }
2042
2043 return 0;
2044}
2045
ab93297c
NM
2046_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2047 assert_return(s, -EINVAL);
2048 assert_return(s->type == SOURCE_IO, -EDOM);
2049
2050 return s->io.owned;
2051}
2052
2053_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2054 assert_return(s, -EINVAL);
2055 assert_return(s->type == SOURCE_IO, -EDOM);
2056
2057 s->io.owned = own;
2058 return 0;
2059}
2060
f7262a9f 2061_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2062 assert_return(s, -EINVAL);
2063 assert_return(events, -EINVAL);
2064 assert_return(s->type == SOURCE_IO, -EDOM);
2065 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2066
2067 *events = s->io.events;
2068 return 0;
2069}
2070
f7262a9f 2071_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2072 int r;
2073
305f78bf
LP
2074 assert_return(s, -EINVAL);
2075 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2076 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2077 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2078 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2079
b63c8d4f
DH
2080 /* edge-triggered updates are never skipped, so we can reset edges */
2081 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2082 return 0;
2083
2a0dc6cd
LP
2084 r = source_set_pending(s, false);
2085 if (r < 0)
2086 return r;
2087
baf76283 2088 if (s->enabled != SD_EVENT_OFF) {
e4715127 2089 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2090 if (r < 0)
2091 return r;
2092 }
2093
2094 s->io.events = events;
2095
2096 return 0;
2097}
2098
f7262a9f 2099_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2100 assert_return(s, -EINVAL);
2101 assert_return(revents, -EINVAL);
2102 assert_return(s->type == SOURCE_IO, -EDOM);
2103 assert_return(s->pending, -ENODATA);
2104 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2105
2106 *revents = s->io.revents;
2107 return 0;
2108}
2109
f7262a9f 2110_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2111 assert_return(s, -EINVAL);
2112 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2113 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2114
2115 return s->signal.sig;
2116}
2117
31927c16 2118_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2119 assert_return(s, -EINVAL);
2120 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2121
6680b8d1
ME
2122 *priority = s->priority;
2123 return 0;
fd38203a
LP
2124}
2125
31927c16 2126_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2127 bool rm_inotify = false, rm_inode = false;
2128 struct inotify_data *new_inotify_data = NULL;
2129 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2130 int r;
2131
305f78bf 2132 assert_return(s, -EINVAL);
da7e457c 2133 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2134 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2135
2136 if (s->priority == priority)
2137 return 0;
2138
97ef5391
LP
2139 if (s->type == SOURCE_INOTIFY) {
2140 struct inode_data *old_inode_data;
2141
2142 assert(s->inotify.inode_data);
2143 old_inode_data = s->inotify.inode_data;
2144
2145 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2146 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2147 * events we allow priority changes only until the first following iteration. */
2148 if (old_inode_data->fd < 0)
2149 return -EOPNOTSUPP;
2150
2151 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2152 if (r < 0)
2153 return r;
2154 rm_inotify = r > 0;
2155
2156 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2157 if (r < 0)
2158 goto fail;
2159 rm_inode = r > 0;
2160
2161 if (new_inode_data->fd < 0) {
2162 /* Duplicate the fd for the new inode object if we don't have any yet */
2163 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2164 if (new_inode_data->fd < 0) {
2165 r = -errno;
2166 goto fail;
2167 }
2168
2169 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2170 }
2171
2172 /* Move the event source to the new inode data structure */
2173 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2174 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2175 s->inotify.inode_data = new_inode_data;
2176
2177 /* Now create the new watch */
2178 r = inode_data_realize_watch(s->event, new_inode_data);
2179 if (r < 0) {
2180 /* Move it back */
2181 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2182 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2183 s->inotify.inode_data = old_inode_data;
2184 goto fail;
2185 }
2186
2187 s->priority = priority;
2188
2189 event_gc_inode_data(s->event, old_inode_data);
2190
2191 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
9da4cb2b
LP
2192 struct signal_data *old, *d;
2193
2194 /* Move us from the signalfd belonging to the old
2195 * priority to the signalfd of the new priority */
2196
2197 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2198
2199 s->priority = priority;
2200
2201 r = event_make_signal_data(s->event, s->signal.sig, &d);
2202 if (r < 0) {
2203 s->priority = old->priority;
2204 return r;
2205 }
2206
2207 event_unmask_signal_data(s->event, old, s->signal.sig);
2208 } else
2209 s->priority = priority;
fd38203a 2210
e1951c16 2211 event_source_pp_prioq_reshuffle(s);
fd38203a 2212
6203e07a
LP
2213 if (s->type == SOURCE_EXIT)
2214 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2215
fd38203a 2216 return 0;
97ef5391
LP
2217
2218fail:
2219 if (rm_inode)
2220 event_free_inode_data(s->event, new_inode_data);
2221
2222 if (rm_inotify)
2223 event_free_inotify_data(s->event, new_inotify_data);
2224
2225 return r;
fd38203a
LP
2226}
2227
f7262a9f 2228_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
305f78bf 2229 assert_return(s, -EINVAL);
305f78bf 2230 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2231
08c1eb0e
ZJS
2232 if (m)
2233 *m = s->enabled;
2234 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2235}
2236
ddfde737 2237static int event_source_disable(sd_event_source *s) {
fd38203a
LP
2238 int r;
2239
ddfde737
LP
2240 assert(s);
2241 assert(s->enabled != SD_EVENT_OFF);
fd38203a 2242
ddfde737
LP
2243 /* Unset the pending flag when this event source is disabled */
2244 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2245 r = source_set_pending(s, false);
2246 if (r < 0)
2247 return r;
2248 }
cc567911 2249
ddfde737 2250 s->enabled = SD_EVENT_OFF;
fd38203a 2251
ddfde737 2252 switch (s->type) {
fd38203a 2253
ddfde737
LP
2254 case SOURCE_IO:
2255 source_io_unregister(s);
2256 break;
ac989a78 2257
ddfde737
LP
2258 case SOURCE_TIME_REALTIME:
2259 case SOURCE_TIME_BOOTTIME:
2260 case SOURCE_TIME_MONOTONIC:
2261 case SOURCE_TIME_REALTIME_ALARM:
2262 case SOURCE_TIME_BOOTTIME_ALARM:
2263 event_source_time_prioq_reshuffle(s);
2264 break;
fd38203a 2265
ddfde737
LP
2266 case SOURCE_SIGNAL:
2267 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2268 break;
fd38203a 2269
ddfde737
LP
2270 case SOURCE_CHILD:
2271 assert(s->event->n_enabled_child_sources > 0);
2272 s->event->n_enabled_child_sources--;
fd38203a 2273
ddfde737
LP
2274 if (EVENT_SOURCE_WATCH_PIDFD(s))
2275 source_child_pidfd_unregister(s);
2276 else
2277 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2278 break;
4807d2d0 2279
ddfde737
LP
2280 case SOURCE_EXIT:
2281 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2282 break;
fd38203a 2283
ddfde737
LP
2284 case SOURCE_DEFER:
2285 case SOURCE_POST:
2286 case SOURCE_INOTIFY:
2287 break;
fd38203a 2288
ddfde737
LP
2289 default:
2290 assert_not_reached("Wut? I shouldn't exist.");
2291 }
fd38203a 2292
ddfde737
LP
2293 return 0;
2294}
f8f3f926 2295
d2eafe61 2296static int event_source_enable(sd_event_source *s, int enable) {
ddfde737 2297 int r;
fd38203a 2298
ddfde737 2299 assert(s);
d2eafe61 2300 assert(IN_SET(enable, SD_EVENT_ON, SD_EVENT_ONESHOT));
ddfde737 2301 assert(s->enabled == SD_EVENT_OFF);
305f78bf 2302
ddfde737
LP
2303 /* Unset the pending flag when this event source is enabled */
2304 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2305 r = source_set_pending(s, false);
2306 if (r < 0)
2307 return r;
2308 }
9d3e3aa5 2309
ddfde737 2310 switch (s->type) {
ddfde737 2311 case SOURCE_IO:
d2eafe61
ZJS
2312 r = source_io_register(s, enable, s->io.events);
2313 if (r < 0)
ddfde737 2314 return r;
ddfde737 2315 break;
fd38203a 2316
ddfde737
LP
2317 case SOURCE_SIGNAL:
2318 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2319 if (r < 0) {
ddfde737
LP
2320 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2321 return r;
2322 }
fd38203a 2323
ddfde737 2324 break;
fd38203a 2325
ddfde737 2326 case SOURCE_CHILD:
ddfde737
LP
2327 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2328 /* yes, we have pidfd */
9da4cb2b 2329
d2eafe61 2330 r = source_child_pidfd_register(s, enable);
ac9f2640 2331 if (r < 0)
9da4cb2b 2332 return r;
ddfde737
LP
2333 } else {
2334 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2335
ddfde737
LP
2336 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2337 if (r < 0) {
ddfde737
LP
2338 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2339 return r;
2340 }
2341 }
fd38203a 2342
ac9f2640
ZJS
2343 s->event->n_enabled_child_sources++;
2344
ddfde737 2345 break;
4807d2d0 2346
d2eafe61
ZJS
2347 case SOURCE_TIME_REALTIME:
2348 case SOURCE_TIME_BOOTTIME:
2349 case SOURCE_TIME_MONOTONIC:
2350 case SOURCE_TIME_REALTIME_ALARM:
2351 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737 2352 case SOURCE_EXIT:
ddfde737
LP
2353 case SOURCE_DEFER:
2354 case SOURCE_POST:
2355 case SOURCE_INOTIFY:
2356 break;
9da4cb2b 2357
ddfde737
LP
2358 default:
2359 assert_not_reached("Wut? I shouldn't exist.");
2360 }
f8f3f926 2361
d2eafe61
ZJS
2362 s->enabled = enable;
2363
2364 /* Non-failing operations below */
2365 switch (s->type) {
2366 case SOURCE_TIME_REALTIME:
2367 case SOURCE_TIME_BOOTTIME:
2368 case SOURCE_TIME_MONOTONIC:
2369 case SOURCE_TIME_REALTIME_ALARM:
2370 case SOURCE_TIME_BOOTTIME_ALARM:
2371 event_source_time_prioq_reshuffle(s);
2372 break;
2373
2374 case SOURCE_EXIT:
2375 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2376 break;
2377
2378 default:
2379 break;
2380 }
2381
ddfde737
LP
2382 return 0;
2383}
2384
2385_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2386 int r;
9da4cb2b 2387
ddfde737
LP
2388 assert_return(s, -EINVAL);
2389 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2390 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2391
ddfde737
LP
2392 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2393 if (s->event->state == SD_EVENT_FINISHED)
2394 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2395
ddfde737
LP
2396 if (s->enabled == m) /* No change? */
2397 return 0;
9d3e3aa5 2398
ddfde737
LP
2399 if (m == SD_EVENT_OFF)
2400 r = event_source_disable(s);
2401 else {
2402 if (s->enabled != SD_EVENT_OFF) {
2403 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2404 * event source is already enabled after all. */
2405 s->enabled = m;
2406 return 0;
fd38203a 2407 }
ddfde737
LP
2408
2409 r = event_source_enable(s, m);
fd38203a 2410 }
ddfde737
LP
2411 if (r < 0)
2412 return r;
fd38203a 2413
e1951c16 2414 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2415 return 0;
2416}
2417
f7262a9f 2418_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2419 assert_return(s, -EINVAL);
2420 assert_return(usec, -EINVAL);
6a0f1f6d 2421 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2422 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2423
2424 *usec = s->time.next;
2425 return 0;
2426}
2427
f7262a9f 2428_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2429 int r;
6a0f1f6d 2430
305f78bf 2431 assert_return(s, -EINVAL);
6a0f1f6d 2432 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2433 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2434 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2435
2a0dc6cd
LP
2436 r = source_set_pending(s, false);
2437 if (r < 0)
2438 return r;
2576a19e 2439
2a0dc6cd 2440 s->time.next = usec;
fd38203a 2441
e1951c16 2442 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2443 return 0;
2444}
2445
d6a83dc4
LP
2446_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2447 usec_t t;
2448 int r;
2449
2450 assert_return(s, -EINVAL);
2451 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2452
2453 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2454 if (r < 0)
2455 return r;
2456
2457 if (usec >= USEC_INFINITY - t)
2458 return -EOVERFLOW;
2459
2460 return sd_event_source_set_time(s, t + usec);
2461}
2462
f7262a9f 2463_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2464 assert_return(s, -EINVAL);
2465 assert_return(usec, -EINVAL);
6a0f1f6d 2466 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2467 assert_return(!event_pid_changed(s->event), -ECHILD);
2468
2469 *usec = s->time.accuracy;
2470 return 0;
2471}
2472
f7262a9f 2473_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2474 int r;
6a0f1f6d 2475
305f78bf
LP
2476 assert_return(s, -EINVAL);
2477 assert_return(usec != (uint64_t) -1, -EINVAL);
6a0f1f6d 2478 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2479 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2480 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2481
2a0dc6cd
LP
2482 r = source_set_pending(s, false);
2483 if (r < 0)
2484 return r;
2485
eaa3cbef
LP
2486 if (usec == 0)
2487 usec = DEFAULT_ACCURACY_USEC;
2488
eaa3cbef
LP
2489 s->time.accuracy = usec;
2490
e1951c16 2491 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2492 return 0;
2493}
2494
2495_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2496 assert_return(s, -EINVAL);
2497 assert_return(clock, -EINVAL);
2498 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2499 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2500
6a0f1f6d 2501 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2502 return 0;
2503}
2504
f7262a9f 2505_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2506 assert_return(s, -EINVAL);
2507 assert_return(pid, -EINVAL);
2508 assert_return(s->type == SOURCE_CHILD, -EDOM);
2509 assert_return(!event_pid_changed(s->event), -ECHILD);
2510
2511 *pid = s->child.pid;
2512 return 0;
2513}
2514
f8f3f926
LP
2515_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2516 assert_return(s, -EINVAL);
2517 assert_return(s->type == SOURCE_CHILD, -EDOM);
2518 assert_return(!event_pid_changed(s->event), -ECHILD);
2519
2520 if (s->child.pidfd < 0)
2521 return -EOPNOTSUPP;
2522
2523 return s->child.pidfd;
2524}
2525
2526_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2527 assert_return(s, -EINVAL);
2528 assert_return(s->type == SOURCE_CHILD, -EDOM);
2529 assert_return(!event_pid_changed(s->event), -ECHILD);
2530 assert_return(SIGNAL_VALID(sig), -EINVAL);
2531
2532 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2533 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2534 * available. */
2535 if (s->child.exited)
2536 return -ESRCH;
2537
2538 if (s->child.pidfd >= 0) {
2539 siginfo_t copy;
2540
2541 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2542 * structure here */
2543 if (si)
2544 copy = *si;
2545
2546 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2547 /* Let's propagate the error only if the system call is not implemented or prohibited */
2548 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2549 return -errno;
2550 } else
2551 return 0;
2552 }
2553
2554 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2555 * this here. */
2556 if (flags != 0)
2557 return -EOPNOTSUPP;
2558
2559 if (si) {
2560 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2561 siginfo_t copy = *si;
2562
2563 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2564 return -errno;
2565 } else if (kill(s->child.pid, sig) < 0)
2566 return -errno;
2567
2568 return 0;
2569}
2570
2571_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2572 assert_return(s, -EINVAL);
2573 assert_return(s->type == SOURCE_CHILD, -EDOM);
2574
2575 if (s->child.pidfd < 0)
2576 return -EOPNOTSUPP;
2577
2578 return s->child.pidfd_owned;
2579}
2580
2581_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2582 assert_return(s, -EINVAL);
2583 assert_return(s->type == SOURCE_CHILD, -EDOM);
2584
2585 if (s->child.pidfd < 0)
2586 return -EOPNOTSUPP;
2587
2588 s->child.pidfd_owned = own;
2589 return 0;
2590}
2591
2592_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2593 assert_return(s, -EINVAL);
2594 assert_return(s->type == SOURCE_CHILD, -EDOM);
2595
2596 return s->child.process_owned;
2597}
2598
2599_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2600 assert_return(s, -EINVAL);
2601 assert_return(s->type == SOURCE_CHILD, -EDOM);
2602
2603 s->child.process_owned = own;
2604 return 0;
2605}
2606
97ef5391
LP
2607_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2608 assert_return(s, -EINVAL);
2609 assert_return(mask, -EINVAL);
2610 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2611 assert_return(!event_pid_changed(s->event), -ECHILD);
2612
2613 *mask = s->inotify.mask;
2614 return 0;
2615}
2616
718db961 2617_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2618 int r;
2619
da7e457c 2620 assert_return(s, -EINVAL);
6203e07a 2621 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2622 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2623 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2624
2625 if (s->prepare == callback)
2626 return 0;
2627
2628 if (callback && s->prepare) {
2629 s->prepare = callback;
2630 return 0;
2631 }
2632
2633 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2634 if (r < 0)
2635 return r;
2636
2637 s->prepare = callback;
2638
2639 if (callback) {
2640 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2641 if (r < 0)
2642 return r;
2643 } else
2644 prioq_remove(s->event->prepare, s, &s->prepare_index);
2645
2646 return 0;
2647}
2648
f7262a9f 2649_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2650 assert_return(s, NULL);
fd38203a
LP
2651
2652 return s->userdata;
2653}
2654
8f726607
LP
2655_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2656 void *ret;
2657
2658 assert_return(s, NULL);
2659
2660 ret = s->userdata;
2661 s->userdata = userdata;
2662
2663 return ret;
2664}
2665
c2ba3ad6
LP
2666static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2667 usec_t c;
2668 assert(e);
2669 assert(a <= b);
2670
2671 if (a <= 0)
2672 return 0;
393003e1
LP
2673 if (a >= USEC_INFINITY)
2674 return USEC_INFINITY;
c2ba3ad6
LP
2675
2676 if (b <= a + 1)
2677 return a;
2678
52444dc4
LP
2679 initialize_perturb(e);
2680
c2ba3ad6
LP
2681 /*
2682 Find a good time to wake up again between times a and b. We
2683 have two goals here:
2684
2685 a) We want to wake up as seldom as possible, hence prefer
2686 later times over earlier times.
2687
2688 b) But if we have to wake up, then let's make sure to
2689 dispatch as much as possible on the entire system.
2690
2691 We implement this by waking up everywhere at the same time
850516e0 2692 within any given minute if we can, synchronised via the
c2ba3ad6 2693 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
2694 then we try to find the same spot in every 10s, then 1s and
2695 then 250ms step. Otherwise, we pick the last possible time
2696 to wake up.
c2ba3ad6
LP
2697 */
2698
850516e0
LP
2699 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2700 if (c >= b) {
2701 if (_unlikely_(c < USEC_PER_MINUTE))
2702 return b;
2703
2704 c -= USEC_PER_MINUTE;
2705 }
2706
ba276c81
LP
2707 if (c >= a)
2708 return c;
2709
2710 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2711 if (c >= b) {
2712 if (_unlikely_(c < USEC_PER_SEC*10))
2713 return b;
2714
2715 c -= USEC_PER_SEC*10;
2716 }
2717
850516e0
LP
2718 if (c >= a)
2719 return c;
2720
2721 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
2722 if (c >= b) {
2723 if (_unlikely_(c < USEC_PER_SEC))
2724 return b;
2725
2726 c -= USEC_PER_SEC;
2727 }
2728
2729 if (c >= a)
2730 return c;
2731
2732 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2733 if (c >= b) {
2734 if (_unlikely_(c < USEC_PER_MSEC*250))
2735 return b;
2736
2737 c -= USEC_PER_MSEC*250;
2738 }
2739
2740 if (c >= a)
2741 return c;
2742
2743 return b;
2744}
2745
fd38203a
LP
2746static int event_arm_timer(
2747 sd_event *e,
6a0f1f6d 2748 struct clock_data *d) {
fd38203a
LP
2749
2750 struct itimerspec its = {};
c2ba3ad6
LP
2751 sd_event_source *a, *b;
2752 usec_t t;
fd38203a 2753
cde93897 2754 assert(e);
6a0f1f6d 2755 assert(d);
fd38203a 2756
d06441da 2757 if (!d->needs_rearm)
212bbb17
TG
2758 return 0;
2759 else
2760 d->needs_rearm = false;
2761
6a0f1f6d 2762 a = prioq_peek(d->earliest);
393003e1 2763 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
72aedc1e 2764
6a0f1f6d 2765 if (d->fd < 0)
c57b5ca3
LP
2766 return 0;
2767
3a43da28 2768 if (d->next == USEC_INFINITY)
72aedc1e
LP
2769 return 0;
2770
2771 /* disarm */
15c689d7
LP
2772 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
2773 return -errno;
72aedc1e 2774
3a43da28 2775 d->next = USEC_INFINITY;
fd38203a 2776 return 0;
72aedc1e 2777 }
fd38203a 2778
6a0f1f6d 2779 b = prioq_peek(d->latest);
baf76283 2780 assert_se(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 2781
1bce0ffa 2782 t = sleep_between(e, a->time.next, time_event_source_latest(b));
6a0f1f6d 2783 if (d->next == t)
fd38203a
LP
2784 return 0;
2785
6a0f1f6d 2786 assert_se(d->fd >= 0);
fd38203a 2787
c2ba3ad6 2788 if (t == 0) {
fd38203a
LP
2789 /* We don' want to disarm here, just mean some time looooong ago. */
2790 its.it_value.tv_sec = 0;
2791 its.it_value.tv_nsec = 1;
2792 } else
c2ba3ad6 2793 timespec_store(&its.it_value, t);
fd38203a 2794
15c689d7 2795 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 2796 return -errno;
fd38203a 2797
6a0f1f6d 2798 d->next = t;
fd38203a
LP
2799 return 0;
2800}
2801
9a800b56 2802static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
2803 assert(e);
2804 assert(s);
2805 assert(s->type == SOURCE_IO);
2806
9a800b56
LP
2807 /* If the event source was already pending, we just OR in the
2808 * new revents, otherwise we reset the value. The ORing is
2809 * necessary to handle EPOLLONESHOT events properly where
2810 * readability might happen independently of writability, and
2811 * we need to keep track of both */
2812
2813 if (s->pending)
2814 s->io.revents |= revents;
2815 else
2816 s->io.revents = revents;
fd38203a 2817
fd38203a
LP
2818 return source_set_pending(s, true);
2819}
2820
72aedc1e 2821static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
2822 uint64_t x;
2823 ssize_t ss;
2824
2825 assert(e);
da7e457c 2826 assert(fd >= 0);
72aedc1e 2827
305f78bf 2828 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
2829
2830 ss = read(fd, &x, sizeof(x));
2831 if (ss < 0) {
945c2931 2832 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
2833 return 0;
2834
2835 return -errno;
2836 }
2837
8d35dae7 2838 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
2839 return -EIO;
2840
cde93897 2841 if (next)
3a43da28 2842 *next = USEC_INFINITY;
72aedc1e 2843
fd38203a
LP
2844 return 0;
2845}
2846
305f78bf
LP
2847static int process_timer(
2848 sd_event *e,
2849 usec_t n,
6a0f1f6d 2850 struct clock_data *d) {
305f78bf 2851
fd38203a
LP
2852 sd_event_source *s;
2853 int r;
2854
2855 assert(e);
6a0f1f6d 2856 assert(d);
fd38203a
LP
2857
2858 for (;;) {
6a0f1f6d 2859 s = prioq_peek(d->earliest);
fd38203a
LP
2860 if (!s ||
2861 s->time.next > n ||
baf76283 2862 s->enabled == SD_EVENT_OFF ||
fd38203a
LP
2863 s->pending)
2864 break;
2865
2866 r = source_set_pending(s, true);
2867 if (r < 0)
2868 return r;
2869
e1951c16 2870 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2871 }
2872
2873 return 0;
2874}
2875
2876static int process_child(sd_event *e) {
2877 sd_event_source *s;
fd38203a
LP
2878 int r;
2879
2880 assert(e);
2881
c2ba3ad6
LP
2882 e->need_process_child = false;
2883
fd38203a
LP
2884 /*
2885 So, this is ugly. We iteratively invoke waitid() with P_PID
2886 + WNOHANG for each PID we wait for, instead of using
2887 P_ALL. This is because we only want to get child
2888 information of very specific child processes, and not all
2889 of them. We might not have processed the SIGCHLD even of a
2890 previous invocation and we don't want to maintain a
2891 unbounded *per-child* event queue, hence we really don't
2892 want anything flushed out of the kernel's queue that we
2893 don't care about. Since this is O(n) this means that if you
2894 have a lot of processes you probably want to handle SIGCHLD
2895 yourself.
08cd1552
LP
2896
2897 We do not reap the children here (by using WNOWAIT), this
2898 is only done after the event source is dispatched so that
2899 the callback still sees the process as a zombie.
fd38203a
LP
2900 */
2901
90e74a66 2902 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
2903 assert(s->type == SOURCE_CHILD);
2904
2905 if (s->pending)
2906 continue;
2907
baf76283 2908 if (s->enabled == SD_EVENT_OFF)
fd38203a
LP
2909 continue;
2910
f8f3f926
LP
2911 if (s->child.exited)
2912 continue;
2913
2914 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2915 continue;
2916
fd38203a 2917 zero(s->child.siginfo);
15c689d7
LP
2918 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
2919 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
fd38203a
LP
2920 return -errno;
2921
2922 if (s->child.siginfo.si_pid != 0) {
945c2931 2923 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 2924
f8f3f926
LP
2925 if (zombie)
2926 s->child.exited = true;
2927
08cd1552
LP
2928 if (!zombie && (s->child.options & WEXITED)) {
2929 /* If the child isn't dead then let's
2930 * immediately remove the state change
2931 * from the queue, since there's no
2932 * benefit in leaving it queued */
2933
2934 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 2935 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
2936 }
2937
fd38203a
LP
2938 r = source_set_pending(s, true);
2939 if (r < 0)
2940 return r;
2941 }
2942 }
2943
fd38203a
LP
2944 return 0;
2945}
2946
f8f3f926
LP
2947static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2948 assert(e);
2949 assert(s);
2950 assert(s->type == SOURCE_CHILD);
2951
2952 if (s->pending)
2953 return 0;
2954
2955 if (s->enabled == SD_EVENT_OFF)
2956 return 0;
2957
2958 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2959 return 0;
2960
2961 zero(s->child.siginfo);
2962 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2963 return -errno;
2964
2965 if (s->child.siginfo.si_pid == 0)
2966 return 0;
2967
2968 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2969 s->child.exited = true;
2970
2971 return source_set_pending(s, true);
2972}
2973
9da4cb2b 2974static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
fd38203a 2975 bool read_one = false;
fd38203a
LP
2976 int r;
2977
da7e457c 2978 assert(e);
97ef5391 2979 assert(d);
305f78bf 2980 assert_return(events == EPOLLIN, -EIO);
fd38203a 2981
9da4cb2b
LP
2982 /* If there's a signal queued on this priority and SIGCHLD is
2983 on this priority too, then make sure to recheck the
2984 children we watch. This is because we only ever dequeue
2985 the first signal per priority, and if we dequeue one, and
2986 SIGCHLD might be enqueued later we wouldn't know, but we
2987 might have higher priority children we care about hence we
2988 need to check that explicitly. */
2989
2990 if (sigismember(&d->sigset, SIGCHLD))
2991 e->need_process_child = true;
2992
2993 /* If there's already an event source pending for this
2994 * priority we don't read another */
2995 if (d->current)
2996 return 0;
2997
fd38203a 2998 for (;;) {
0eb2e0e3 2999 struct signalfd_siginfo si;
7057bd99 3000 ssize_t n;
92daebc0 3001 sd_event_source *s = NULL;
fd38203a 3002
9da4cb2b 3003 n = read(d->fd, &si, sizeof(si));
7057bd99 3004 if (n < 0) {
945c2931 3005 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
3006 return read_one;
3007
3008 return -errno;
3009 }
3010
7057bd99 3011 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3012 return -EIO;
3013
6eb7c172 3014 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3015
fd38203a
LP
3016 read_one = true;
3017
92daebc0
LP
3018 if (e->signal_sources)
3019 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3020 if (!s)
3021 continue;
9da4cb2b
LP
3022 if (s->pending)
3023 continue;
fd38203a
LP
3024
3025 s->signal.siginfo = si;
9da4cb2b
LP
3026 d->current = s;
3027
fd38203a
LP
3028 r = source_set_pending(s, true);
3029 if (r < 0)
3030 return r;
9da4cb2b
LP
3031
3032 return 1;
fd38203a 3033 }
fd38203a
LP
3034}
3035
97ef5391
LP
3036static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3037 ssize_t n;
3038
3039 assert(e);
3040 assert(d);
3041
3042 assert_return(revents == EPOLLIN, -EIO);
3043
3044 /* If there's already an event source pending for this priority, don't read another */
3045 if (d->n_pending > 0)
3046 return 0;
3047
3048 /* Is the read buffer non-empty? If so, let's not read more */
3049 if (d->buffer_filled > 0)
3050 return 0;
3051
3052 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3053 if (n < 0) {
3054 if (IN_SET(errno, EAGAIN, EINTR))
3055 return 0;
3056
3057 return -errno;
3058 }
3059
3060 assert(n > 0);
3061 d->buffer_filled = (size_t) n;
3062 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3063
3064 return 1;
3065}
3066
3067static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3068 assert(e);
3069 assert(d);
3070 assert(sz <= d->buffer_filled);
3071
3072 if (sz == 0)
3073 return;
3074
3075 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3076 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3077 d->buffer_filled -= sz;
3078
3079 if (d->buffer_filled == 0)
3080 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3081}
3082
3083static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3084 int r;
3085
3086 assert(e);
3087 assert(d);
3088
3089 /* If there's already an event source pending for this priority, don't read another */
3090 if (d->n_pending > 0)
3091 return 0;
3092
3093 while (d->buffer_filled > 0) {
3094 size_t sz;
3095
3096 /* Let's validate that the event structures are complete */
3097 if (d->buffer_filled < offsetof(struct inotify_event, name))
3098 return -EIO;
3099
3100 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3101 if (d->buffer_filled < sz)
3102 return -EIO;
3103
3104 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3105 struct inode_data *inode_data;
97ef5391
LP
3106
3107 /* The queue overran, let's pass this event to all event sources connected to this inotify
3108 * object */
3109
90e74a66 3110 HASHMAP_FOREACH(inode_data, d->inodes) {
97ef5391
LP
3111 sd_event_source *s;
3112
3113 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3114
3115 if (s->enabled == SD_EVENT_OFF)
3116 continue;
3117
3118 r = source_set_pending(s, true);
3119 if (r < 0)
3120 return r;
3121 }
3122 }
3123 } else {
3124 struct inode_data *inode_data;
3125 sd_event_source *s;
3126
3127 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3128 * our watch descriptor table. */
3129 if (d->buffer.ev.mask & IN_IGNORED) {
3130
3131 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3132 if (!inode_data) {
3133 event_inotify_data_drop(e, d, sz);
3134 continue;
3135 }
3136
3137 /* The watch descriptor was removed by the kernel, let's drop it here too */
3138 inode_data->wd = -1;
3139 } else {
3140 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3141 if (!inode_data) {
3142 event_inotify_data_drop(e, d, sz);
3143 continue;
3144 }
3145 }
3146
3147 /* Trigger all event sources that are interested in these events. Also trigger all event
3148 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3149 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3150
3151 if (s->enabled == SD_EVENT_OFF)
3152 continue;
3153
3154 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3155 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3156 continue;
3157
3158 r = source_set_pending(s, true);
3159 if (r < 0)
3160 return r;
3161 }
3162 }
3163
3164 /* Something pending now? If so, let's finish, otherwise let's read more. */
3165 if (d->n_pending > 0)
3166 return 1;
3167 }
3168
3169 return 0;
3170}
3171
3172static int process_inotify(sd_event *e) {
3173 struct inotify_data *d;
3174 int r, done = 0;
3175
3176 assert(e);
3177
3178 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3179 r = event_inotify_data_process(e, d);
3180 if (r < 0)
3181 return r;
3182 if (r > 0)
3183 done ++;
3184 }
3185
3186 return done;
3187}
3188
fd38203a 3189static int source_dispatch(sd_event_source *s) {
b778cba4 3190 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3191 EventSourceType saved_type;
fe8245eb 3192 int r = 0;
fd38203a
LP
3193
3194 assert(s);
6203e07a 3195 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3196
b778cba4
LP
3197 /* Save the event source type, here, so that we still know it after the event callback which might
3198 * invalidate the event. */
8f5c235d
LP
3199 saved_type = s->type;
3200
b778cba4
LP
3201 /* Similar, store a reference to the event loop object, so that we can still access it after the
3202 * callback might have invalidated/disconnected the event source. */
3203 saved_event = sd_event_ref(s->event);
3204
945c2931 3205 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3206 r = source_set_pending(s, false);
3207 if (r < 0)
3208 return r;
3209 }
fd38203a 3210
6e9feda3
LP
3211 if (s->type != SOURCE_POST) {
3212 sd_event_source *z;
6e9feda3
LP
3213
3214 /* If we execute a non-post source, let's mark all
3215 * post sources as pending */
3216
90e74a66 3217 SET_FOREACH(z, s->event->post_sources) {
6e9feda3
LP
3218 if (z->enabled == SD_EVENT_OFF)
3219 continue;
3220
3221 r = source_set_pending(z, true);
3222 if (r < 0)
3223 return r;
3224 }
3225 }
3226
baf76283
LP
3227 if (s->enabled == SD_EVENT_ONESHOT) {
3228 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3229 if (r < 0)
3230 return r;
3231 }
3232
12179984 3233 s->dispatching = true;
b7484e2a 3234
fd38203a
LP
3235 switch (s->type) {
3236
3237 case SOURCE_IO:
3238 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3239 break;
3240
6a0f1f6d 3241 case SOURCE_TIME_REALTIME:
a8548816 3242 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3243 case SOURCE_TIME_MONOTONIC:
3244 case SOURCE_TIME_REALTIME_ALARM:
3245 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3246 r = s->time.callback(s, s->time.next, s->userdata);
3247 break;
3248
3249 case SOURCE_SIGNAL:
3250 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3251 break;
3252
08cd1552
LP
3253 case SOURCE_CHILD: {
3254 bool zombie;
3255
945c2931 3256 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3257
fd38203a 3258 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3259
3260 /* Now, reap the PID for good. */
f8f3f926 3261 if (zombie) {
cc59d290 3262 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3263 s->child.waited = true;
3264 }
08cd1552 3265
fd38203a 3266 break;
08cd1552 3267 }
fd38203a
LP
3268
3269 case SOURCE_DEFER:
3270 r = s->defer.callback(s, s->userdata);
3271 break;
da7e457c 3272
6e9feda3
LP
3273 case SOURCE_POST:
3274 r = s->post.callback(s, s->userdata);
3275 break;
3276
6203e07a
LP
3277 case SOURCE_EXIT:
3278 r = s->exit.callback(s, s->userdata);
da7e457c 3279 break;
9d3e3aa5 3280
97ef5391
LP
3281 case SOURCE_INOTIFY: {
3282 struct sd_event *e = s->event;
3283 struct inotify_data *d;
3284 size_t sz;
3285
3286 assert(s->inotify.inode_data);
3287 assert_se(d = s->inotify.inode_data->inotify_data);
3288
3289 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3290 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3291 assert(d->buffer_filled >= sz);
3292
3293 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3294
3295 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3296 * buffer. */
3297 if (d->n_pending == 0)
3298 event_inotify_data_drop(e, d, sz);
3299
3300 break;
3301 }
3302
9d3e3aa5 3303 case SOURCE_WATCHDOG:
a71fe8b8 3304 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3305 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
9d3e3aa5 3306 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
3307 }
3308
12179984
LP
3309 s->dispatching = false;
3310
b778cba4
LP
3311 if (r < 0) {
3312 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3313 strna(s->description),
3314 event_source_type_to_string(saved_type),
3315 s->exit_on_failure ? "exiting" : "disabling");
3316
3317 if (s->exit_on_failure)
3318 (void) sd_event_exit(saved_event, r);
3319 }
12179984
LP
3320
3321 if (s->n_ref == 0)
3322 source_free(s);
3323 else if (r < 0)
6203e07a 3324 sd_event_source_set_enabled(s, SD_EVENT_OFF);
b7484e2a 3325
6203e07a 3326 return 1;
fd38203a
LP
3327}
3328
3329static int event_prepare(sd_event *e) {
3330 int r;
3331
3332 assert(e);
3333
3334 for (;;) {
3335 sd_event_source *s;
3336
3337 s = prioq_peek(e->prepare);
baf76283 3338 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
fd38203a
LP
3339 break;
3340
3341 s->prepare_iteration = e->iteration;
3342 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3343 if (r < 0)
3344 return r;
3345
3346 assert(s->prepare);
12179984
LP
3347
3348 s->dispatching = true;
fd38203a 3349 r = s->prepare(s, s->userdata);
12179984
LP
3350 s->dispatching = false;
3351
b778cba4
LP
3352 if (r < 0) {
3353 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3354 strna(s->description),
3355 event_source_type_to_string(s->type),
3356 s->exit_on_failure ? "exiting" : "disabling");
3357
3358 if (s->exit_on_failure)
3359 (void) sd_event_exit(e, r);
3360 }
fd38203a 3361
12179984
LP
3362 if (s->n_ref == 0)
3363 source_free(s);
3364 else if (r < 0)
3365 sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3366 }
3367
3368 return 0;
3369}
3370
6203e07a 3371static int dispatch_exit(sd_event *e) {
da7e457c 3372 sd_event_source *p;
30dd293c 3373 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
da7e457c
LP
3374 int r;
3375
3376 assert(e);
3377
6203e07a 3378 p = prioq_peek(e->exit);
baf76283 3379 if (!p || p->enabled == SD_EVENT_OFF) {
da7e457c
LP
3380 e->state = SD_EVENT_FINISHED;
3381 return 0;
3382 }
3383
30dd293c 3384 ref = sd_event_ref(e);
da7e457c 3385 e->iteration++;
6203e07a 3386 e->state = SD_EVENT_EXITING;
da7e457c 3387 r = source_dispatch(p);
2b0c9ef7 3388 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3389 return r;
3390}
3391
c2ba3ad6
LP
3392static sd_event_source* event_next_pending(sd_event *e) {
3393 sd_event_source *p;
3394
da7e457c
LP
3395 assert(e);
3396
c2ba3ad6
LP
3397 p = prioq_peek(e->pending);
3398 if (!p)
3399 return NULL;
3400
baf76283 3401 if (p->enabled == SD_EVENT_OFF)
c2ba3ad6
LP
3402 return NULL;
3403
3404 return p;
3405}
3406
cde93897
LP
3407static int arm_watchdog(sd_event *e) {
3408 struct itimerspec its = {};
3409 usec_t t;
cde93897
LP
3410
3411 assert(e);
3412 assert(e->watchdog_fd >= 0);
3413
3414 t = sleep_between(e,
3415 e->watchdog_last + (e->watchdog_period / 2),
3416 e->watchdog_last + (e->watchdog_period * 3 / 4));
3417
3418 timespec_store(&its.it_value, t);
3419
75145780
LP
3420 /* Make sure we never set the watchdog to 0, which tells the
3421 * kernel to disable it. */
3422 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3423 its.it_value.tv_nsec = 1;
3424
15c689d7 3425 if (timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897
LP
3426 return -errno;
3427
3428 return 0;
3429}
3430
3431static int process_watchdog(sd_event *e) {
3432 assert(e);
3433
3434 if (!e->watchdog)
3435 return 0;
3436
3437 /* Don't notify watchdog too often */
3438 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3439 return 0;
3440
3441 sd_notify(false, "WATCHDOG=1");
3442 e->watchdog_last = e->timestamp.monotonic;
3443
3444 return arm_watchdog(e);
3445}
3446
97ef5391
LP
3447static void event_close_inode_data_fds(sd_event *e) {
3448 struct inode_data *d;
3449
3450 assert(e);
3451
3452 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3453 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3454 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3455 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3456 * compromise. */
3457
3458 while ((d = e->inode_data_to_close)) {
3459 assert(d->fd >= 0);
3460 d->fd = safe_close(d->fd);
3461
3462 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3463 }
3464}
3465
c45a5a74
TG
3466_public_ int sd_event_prepare(sd_event *e) {
3467 int r;
fd38203a 3468
da7e457c 3469 assert_return(e, -EINVAL);
b937d761 3470 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3471 assert_return(!event_pid_changed(e), -ECHILD);
3472 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3473 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3474
e5446015
LP
3475 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3476 * this check here once, since gettid() is typically not cached, and thus want to minimize
3477 * syscalls */
3478 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3479
6203e07a 3480 if (e->exit_requested)
c45a5a74 3481 goto pending;
fd38203a
LP
3482
3483 e->iteration++;
3484
0be6c2f6 3485 e->state = SD_EVENT_PREPARING;
fd38203a 3486 r = event_prepare(e);
0be6c2f6 3487 e->state = SD_EVENT_INITIAL;
fd38203a 3488 if (r < 0)
c45a5a74 3489 return r;
fd38203a 3490
6a0f1f6d
LP
3491 r = event_arm_timer(e, &e->realtime);
3492 if (r < 0)
c45a5a74 3493 return r;
6a0f1f6d 3494
a8548816
TG
3495 r = event_arm_timer(e, &e->boottime);
3496 if (r < 0)
c45a5a74 3497 return r;
a8548816 3498
6a0f1f6d
LP
3499 r = event_arm_timer(e, &e->monotonic);
3500 if (r < 0)
c45a5a74 3501 return r;
6a0f1f6d
LP
3502
3503 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3504 if (r < 0)
c45a5a74 3505 return r;
fd38203a 3506
6a0f1f6d 3507 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3508 if (r < 0)
c45a5a74 3509 return r;
fd38203a 3510
97ef5391
LP
3511 event_close_inode_data_fds(e);
3512
1b5995b0 3513 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3514 goto pending;
3515
2b0c9ef7 3516 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3517
3518 return 0;
3519
3520pending:
2b0c9ef7 3521 e->state = SD_EVENT_ARMED;
6d148a84
TG
3522 r = sd_event_wait(e, 0);
3523 if (r == 0)
2b0c9ef7 3524 e->state = SD_EVENT_ARMED;
6d148a84
TG
3525
3526 return r;
c45a5a74
TG
3527}
3528
3529_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
5cddd924 3530 size_t event_queue_max;
c45a5a74
TG
3531 int r, m, i;
3532
3533 assert_return(e, -EINVAL);
b937d761 3534 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3535 assert_return(!event_pid_changed(e), -ECHILD);
3536 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3537 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
c45a5a74
TG
3538
3539 if (e->exit_requested) {
3540 e->state = SD_EVENT_PENDING;
3541 return 1;
3542 }
6a0f1f6d 3543
5cddd924
LP
3544 event_queue_max = MAX(e->n_sources, 1u);
3545 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3546 return -ENOMEM;
fd38203a 3547
97ef5391
LP
3548 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3549 if (e->inotify_data_buffered)
3550 timeout = 0;
3551
5cddd924 3552 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
bab4820e 3553 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
da7e457c 3554 if (m < 0) {
c45a5a74
TG
3555 if (errno == EINTR) {
3556 e->state = SD_EVENT_PENDING;
3557 return 1;
3558 }
3559
3560 r = -errno;
da7e457c
LP
3561 goto finish;
3562 }
fd38203a 3563
e475d10c 3564 triple_timestamp_get(&e->timestamp);
fd38203a
LP
3565
3566 for (i = 0; i < m; i++) {
3567
5cddd924
LP
3568 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3569 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3570 else {
5cddd924 3571 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3572
3573 switch (*t) {
3574
f8f3f926 3575 case WAKEUP_EVENT_SOURCE: {
5cddd924 3576 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
3577
3578 assert(s);
3579
3580 switch (s->type) {
3581
3582 case SOURCE_IO:
5cddd924 3583 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
3584 break;
3585
3586 case SOURCE_CHILD:
5cddd924 3587 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
3588 break;
3589
3590 default:
3591 assert_not_reached("Unexpected event source type");
3592 }
3593
9da4cb2b 3594 break;
f8f3f926 3595 }
fd38203a 3596
9da4cb2b 3597 case WAKEUP_CLOCK_DATA: {
5cddd924 3598 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
3599
3600 assert(d);
3601
5cddd924 3602 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
3603 break;
3604 }
3605
3606 case WAKEUP_SIGNAL_DATA:
5cddd924 3607 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
9da4cb2b
LP
3608 break;
3609
97ef5391 3610 case WAKEUP_INOTIFY_DATA:
5cddd924 3611 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
97ef5391
LP
3612 break;
3613
9da4cb2b
LP
3614 default:
3615 assert_not_reached("Invalid wake-up pointer");
3616 }
3617 }
fd38203a 3618 if (r < 0)
da7e457c 3619 goto finish;
fd38203a
LP
3620 }
3621
cde93897
LP
3622 r = process_watchdog(e);
3623 if (r < 0)
3624 goto finish;
3625
6a0f1f6d
LP
3626 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3627 if (r < 0)
3628 goto finish;
3629
e475d10c 3630 r = process_timer(e, e->timestamp.boottime, &e->boottime);
a8548816
TG
3631 if (r < 0)
3632 goto finish;
3633
6a0f1f6d
LP
3634 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3635 if (r < 0)
3636 goto finish;
3637
3638 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 3639 if (r < 0)
da7e457c 3640 goto finish;
fd38203a 3641
e475d10c 3642 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 3643 if (r < 0)
da7e457c 3644 goto finish;
fd38203a 3645
c2ba3ad6 3646 if (e->need_process_child) {
fd38203a
LP
3647 r = process_child(e);
3648 if (r < 0)
da7e457c 3649 goto finish;
fd38203a
LP
3650 }
3651
97ef5391
LP
3652 r = process_inotify(e);
3653 if (r < 0)
3654 goto finish;
3655
c45a5a74
TG
3656 if (event_next_pending(e)) {
3657 e->state = SD_EVENT_PENDING;
3658
3659 return 1;
da7e457c
LP
3660 }
3661
c45a5a74 3662 r = 0;
fd38203a 3663
da7e457c 3664finish:
2b0c9ef7 3665 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3666
3667 return r;
fd38203a
LP
3668}
3669
c45a5a74
TG
3670_public_ int sd_event_dispatch(sd_event *e) {
3671 sd_event_source *p;
3672 int r;
3673
3674 assert_return(e, -EINVAL);
b937d761 3675 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3676 assert_return(!event_pid_changed(e), -ECHILD);
3677 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3678 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3679
3680 if (e->exit_requested)
3681 return dispatch_exit(e);
3682
3683 p = event_next_pending(e);
3684 if (p) {
30dd293c 3685 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
c45a5a74 3686
30dd293c 3687 ref = sd_event_ref(e);
c45a5a74
TG
3688 e->state = SD_EVENT_RUNNING;
3689 r = source_dispatch(p);
2b0c9ef7 3690 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3691 return r;
3692 }
3693
2b0c9ef7 3694 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3695
3696 return 1;
3697}
3698
34b87517 3699static void event_log_delays(sd_event *e) {
442ac269
YW
3700 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3701 size_t l, i;
34b87517 3702
442ac269
YW
3703 p = b;
3704 l = sizeof(b);
3705 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3706 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
3707 e->delays[i] = 0;
3708 }
442ac269 3709 log_debug("Event loop iterations: %s", b);
34b87517
VC
3710}
3711
c45a5a74
TG
3712_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3713 int r;
3714
3715 assert_return(e, -EINVAL);
b937d761 3716 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3717 assert_return(!event_pid_changed(e), -ECHILD);
3718 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3719 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 3720
34b87517
VC
3721 if (e->profile_delays && e->last_run) {
3722 usec_t this_run;
3723 unsigned l;
3724
3725 this_run = now(CLOCK_MONOTONIC);
3726
3727 l = u64log2(this_run - e->last_run);
cb9d621e 3728 assert(l < ELEMENTSOF(e->delays));
34b87517
VC
3729 e->delays[l]++;
3730
3731 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3732 event_log_delays(e);
3733 e->last_log = this_run;
3734 }
3735 }
3736
c45a5a74 3737 r = sd_event_prepare(e);
53bac4e0
LP
3738 if (r == 0)
3739 /* There was nothing? Then wait... */
3740 r = sd_event_wait(e, timeout);
c45a5a74 3741
34b87517
VC
3742 if (e->profile_delays)
3743 e->last_run = now(CLOCK_MONOTONIC);
3744
02d30981 3745 if (r > 0) {
53bac4e0 3746 /* There's something now, then let's dispatch it */
02d30981
TG
3747 r = sd_event_dispatch(e);
3748 if (r < 0)
3749 return r;
53bac4e0
LP
3750
3751 return 1;
3752 }
3753
3754 return r;
c45a5a74
TG
3755}
3756
f7262a9f 3757_public_ int sd_event_loop(sd_event *e) {
30dd293c 3758 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
fd38203a
LP
3759 int r;
3760
da7e457c 3761 assert_return(e, -EINVAL);
b937d761 3762 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 3763 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 3764 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3765
30dd293c 3766 ref = sd_event_ref(e);
fd38203a 3767
da7e457c 3768 while (e->state != SD_EVENT_FINISHED) {
fd38203a
LP
3769 r = sd_event_run(e, (uint64_t) -1);
3770 if (r < 0)
30dd293c 3771 return r;
fd38203a
LP
3772 }
3773
30dd293c 3774 return e->exit_code;
fd38203a
LP
3775}
3776
9b364545 3777_public_ int sd_event_get_fd(sd_event *e) {
9b364545 3778 assert_return(e, -EINVAL);
b937d761 3779 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
3780 assert_return(!event_pid_changed(e), -ECHILD);
3781
3782 return e->epoll_fd;
3783}
3784
f7262a9f 3785_public_ int sd_event_get_state(sd_event *e) {
da7e457c 3786 assert_return(e, -EINVAL);
b937d761 3787 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3788 assert_return(!event_pid_changed(e), -ECHILD);
3789
3790 return e->state;
3791}
3792
6203e07a 3793_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 3794 assert_return(e, -EINVAL);
b937d761 3795 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 3796 assert_return(code, -EINVAL);
da7e457c 3797 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3798
6203e07a
LP
3799 if (!e->exit_requested)
3800 return -ENODATA;
3801
3802 *code = e->exit_code;
3803 return 0;
fd38203a
LP
3804}
3805
6203e07a 3806_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 3807 assert_return(e, -EINVAL);
b937d761 3808 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3809 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3810 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3811
6203e07a
LP
3812 e->exit_requested = true;
3813 e->exit_code = code;
3814
fd38203a
LP
3815 return 0;
3816}
46e8c825 3817
6a0f1f6d 3818_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 3819 assert_return(e, -EINVAL);
b937d761 3820 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 3821 assert_return(usec, -EINVAL);
46e8c825
LP
3822 assert_return(!event_pid_changed(e), -ECHILD);
3823
e475d10c
LP
3824 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3825 return -EOPNOTSUPP;
3826
3827 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3828 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3829 * the purpose of getting the time this doesn't matter. */
3411372e
LP
3830 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3831 return -EOPNOTSUPP;
3832
e475d10c 3833 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 3834 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
3835 *usec = now(clock);
3836 return 1;
3837 }
46e8c825 3838
e475d10c 3839 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
3840 return 0;
3841}
afc6adb5
LP
3842
3843_public_ int sd_event_default(sd_event **ret) {
39883f62 3844 sd_event *e = NULL;
afc6adb5
LP
3845 int r;
3846
3847 if (!ret)
3848 return !!default_event;
3849
3850 if (default_event) {
3851 *ret = sd_event_ref(default_event);
3852 return 0;
3853 }
3854
3855 r = sd_event_new(&e);
3856 if (r < 0)
3857 return r;
3858
3859 e->default_event_ptr = &default_event;
3860 e->tid = gettid();
3861 default_event = e;
3862
3863 *ret = e;
3864 return 1;
3865}
3866
3867_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3868 assert_return(e, -EINVAL);
b937d761 3869 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 3870 assert_return(tid, -EINVAL);
76b54375 3871 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 3872
76b54375
LP
3873 if (e->tid != 0) {
3874 *tid = e->tid;
3875 return 0;
3876 }
3877
3878 return -ENXIO;
afc6adb5 3879}
cde93897
LP
3880
3881_public_ int sd_event_set_watchdog(sd_event *e, int b) {
3882 int r;
3883
3884 assert_return(e, -EINVAL);
b937d761 3885 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 3886 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
3887
3888 if (e->watchdog == !!b)
3889 return e->watchdog;
3890
3891 if (b) {
09812eb7
LP
3892 r = sd_watchdog_enabled(false, &e->watchdog_period);
3893 if (r <= 0)
cde93897 3894 return r;
cde93897
LP
3895
3896 /* Issue first ping immediately */
3897 sd_notify(false, "WATCHDOG=1");
3898 e->watchdog_last = now(CLOCK_MONOTONIC);
3899
3900 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3901 if (e->watchdog_fd < 0)
3902 return -errno;
3903
3904 r = arm_watchdog(e);
3905 if (r < 0)
3906 goto fail;
3907
1eac7948 3908 struct epoll_event ev = {
a82f89aa
LP
3909 .events = EPOLLIN,
3910 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3911 };
cde93897 3912
15c689d7 3913 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
3914 r = -errno;
3915 goto fail;
3916 }
3917
3918 } else {
3919 if (e->watchdog_fd >= 0) {
5a795bff 3920 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 3921 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3922 }
3923 }
3924
3925 e->watchdog = !!b;
3926 return e->watchdog;
3927
3928fail:
03e334a1 3929 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3930 return r;
3931}
8f726607
LP
3932
3933_public_ int sd_event_get_watchdog(sd_event *e) {
3934 assert_return(e, -EINVAL);
b937d761 3935 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
3936 assert_return(!event_pid_changed(e), -ECHILD);
3937
3938 return e->watchdog;
3939}
60a3b1e1
LP
3940
3941_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3942 assert_return(e, -EINVAL);
b937d761 3943 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
3944 assert_return(!event_pid_changed(e), -ECHILD);
3945
3946 *ret = e->iteration;
3947 return 0;
3948}
15723a1d
LP
3949
3950_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3951 assert_return(s, -EINVAL);
3952
3953 s->destroy_callback = callback;
3954 return 0;
3955}
3956
3957_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3958 assert_return(s, -EINVAL);
3959
3960 if (ret)
3961 *ret = s->destroy_callback;
3962
3963 return !!s->destroy_callback;
3964}
2382c936
YW
3965
3966_public_ int sd_event_source_get_floating(sd_event_source *s) {
3967 assert_return(s, -EINVAL);
3968
3969 return s->floating;
3970}
3971
3972_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3973 assert_return(s, -EINVAL);
3974
3975 if (s->floating == !!b)
3976 return 0;
3977
3978 if (!s->event) /* Already disconnected */
3979 return -ESTALE;
3980
3981 s->floating = b;
3982
3983 if (b) {
3984 sd_event_source_ref(s);
3985 sd_event_unref(s->event);
3986 } else {
3987 sd_event_ref(s->event);
3988 sd_event_source_unref(s);
3989 }
3990
3991 return 1;
3992}
b778cba4
LP
3993
3994_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
3995 assert_return(s, -EINVAL);
3996 assert_return(s->type != SOURCE_EXIT, -EDOM);
3997
3998 return s->exit_on_failure;
3999}
4000
4001_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4002 assert_return(s, -EINVAL);
4003 assert_return(s->type != SOURCE_EXIT, -EDOM);
4004
4005 if (s->exit_on_failure == !!b)
4006 return 0;
4007
4008 s->exit_on_failure = b;
4009 return 1;
4010}