]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
sd-event: remove earliest_index/latest_index into common part of event source objects
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
db9ecf05 1/* SPDX-License-Identifier: LGPL-2.1-or-later */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
f8f3f926 12#include "env-util.h"
a137a1c3 13#include "event-source.h"
3ffd4af2 14#include "fd-util.h"
97ef5391 15#include "fs-util.h"
fd38203a 16#include "hashmap.h"
07630cea
LP
17#include "list.h"
18#include "macro.h"
0a970718 19#include "memory-util.h"
f5947a5e 20#include "missing_syscall.h"
07630cea 21#include "prioq.h"
4a0b58c4 22#include "process-util.h"
6e9feda3 23#include "set.h"
24882e06 24#include "signal-util.h"
55cbfaa5 25#include "string-table.h"
07630cea 26#include "string-util.h"
442ac269 27#include "strxcpyx.h"
07630cea 28#include "time-util.h"
fd38203a 29
c2ba3ad6 30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 31
f8f3f926
LP
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
55cbfaa5
DM
40static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 53 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
54};
55
56DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
a8548816 58#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
6a0f1f6d 59
fd38203a 60struct sd_event {
da7e457c 61 unsigned n_ref;
fd38203a
LP
62
63 int epoll_fd;
cde93897 64 int watchdog_fd;
fd38203a
LP
65
66 Prioq *pending;
67 Prioq *prepare;
c2ba3ad6 68
a8548816 69 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
a8548816 73 struct clock_data boottime;
6a0f1f6d
LP
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
fd38203a 77
da7e457c
LP
78 usec_t perturb;
79
9da4cb2b
LP
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
82
83 Hashmap *child_sources;
baf76283 84 unsigned n_enabled_child_sources;
fd38203a 85
6e9feda3
LP
86 Set *post_sources;
87
6203e07a 88 Prioq *exit;
fd38203a 89
97ef5391
LP
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
da7e457c 98 pid_t original_pid;
c2ba3ad6 99
60a3b1e1 100 uint64_t iteration;
e475d10c 101 triple_timestamp timestamp;
da7e457c 102 int state;
eaa3cbef 103
6203e07a 104 bool exit_requested:1;
da7e457c 105 bool need_process_child:1;
cde93897 106 bool watchdog:1;
34b87517 107 bool profile_delays:1;
afc6adb5 108
6203e07a
LP
109 int exit_code;
110
afc6adb5
LP
111 pid_t tid;
112 sd_event **default_event_ptr;
cde93897
LP
113
114 usec_t watchdog_last, watchdog_period;
15b38f93
LP
115
116 unsigned n_sources;
a71fe8b8 117
5cddd924
LP
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
a71fe8b8 121 LIST_HEAD(sd_event_source, sources);
34b87517 122
e6a7bee5 123 usec_t last_run_usec, last_log_usec;
34b87517 124 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
125};
126
b937d761
NM
127static thread_local sd_event *default_event = NULL;
128
a71fe8b8 129static void source_disconnect(sd_event_source *s);
97ef5391 130static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 131
b937d761
NM
132static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134}
135
fd38203a
LP
136static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
9c57a73b 138 int r;
fd38203a
LP
139
140 assert(x->pending);
141 assert(y->pending);
142
baf76283
LP
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 145 return -1;
baf76283 146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
147 return 1;
148
149 /* Lower priority values first */
9c57a73b
YW
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
fd38203a
LP
153
154 /* Older entries first */
9c57a73b 155 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
156}
157
158static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
9c57a73b 160 int r;
fd38203a
LP
161
162 assert(x->prepare);
163 assert(y->prepare);
164
8046c457
KK
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
fd38203a
LP
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
9c57a73b
YW
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
fd38203a 177
fd38203a 178 /* Lower priority values first */
9c57a73b 179 return CMP(x->priority, y->priority);
fd38203a
LP
180}
181
c2ba3ad6 182static int earliest_time_prioq_compare(const void *a, const void *b) {
fd38203a
LP
183 const sd_event_source *x = a, *y = b;
184
6a0f1f6d
LP
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
fd38203a 187
baf76283
LP
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 190 return -1;
baf76283 191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
9c57a73b 201 return CMP(x->time.next, y->time.next);
fd38203a
LP
202}
203
1bce0ffa
LP
204static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206}
207
c2ba3ad6
LP
208static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
6a0f1f6d
LP
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
c2ba3ad6 213
baf76283
LP
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
c2ba3ad6 216 return -1;
baf76283 217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
c2ba3ad6
LP
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
9c57a73b 227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
c2ba3ad6
LP
228}
229
6203e07a 230static int exit_prioq_compare(const void *a, const void *b) {
da7e457c
LP
231 const sd_event_source *x = a, *y = b;
232
6203e07a
LP
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
da7e457c 235
baf76283
LP
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
da7e457c 238 return -1;
baf76283 239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
da7e457c
LP
240 return 1;
241
242 /* Lower priority values first */
6dd91b36 243 return CMP(x->priority, y->priority);
da7e457c
LP
244}
245
6a0f1f6d
LP
246static void free_clock_data(struct clock_data *d) {
247 assert(d);
9da4cb2b 248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253}
254
8301aa0b 255static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
256 sd_event_source *s;
257
fd38203a 258 assert(e);
a71fe8b8
LP
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
15b38f93 266 assert(e->n_sources == 0);
fd38203a 267
afc6adb5
LP
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
03e334a1 271 safe_close(e->epoll_fd);
03e334a1 272 safe_close(e->watchdog_fd);
cde93897 273
6a0f1f6d 274 free_clock_data(&e->realtime);
a8548816 275 free_clock_data(&e->boottime);
6a0f1f6d
LP
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
fd38203a
LP
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
6203e07a 282 prioq_free(e->exit);
fd38203a
LP
283
284 free(e->signal_sources);
9da4cb2b 285 hashmap_free(e->signal_data);
fd38203a 286
97ef5391
LP
287 hashmap_free(e->inotify_data);
288
fd38203a 289 hashmap_free(e->child_sources);
6e9feda3 290 set_free(e->post_sources);
8301aa0b 291
5cddd924
LP
292 free(e->event_queue);
293
8301aa0b 294 return mfree(e);
fd38203a
LP
295}
296
f7262a9f 297_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
298 sd_event *e;
299 int r;
300
305f78bf 301 assert_return(ret, -EINVAL);
fd38203a 302
d08eb1fa 303 e = new(sd_event, 1);
fd38203a
LP
304 if (!e)
305 return -ENOMEM;
306
d08eb1fa
LP
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
fd38203a 329
c983e776
EV
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
fd38203a 332 goto fail;
fd38203a
LP
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
7fe2903c
LP
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
34b87517 342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
34a6843d 343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
34b87517
VC
344 e->profile_delays = true;
345 }
346
fd38203a
LP
347 *ret = e;
348 return 0;
349
350fail:
351 event_free(e);
352 return r;
353}
354
8301aa0b 355DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 356
afd15bbb
ZJS
357_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361}
362
eaa3cbef
LP
363static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
a2360a46 366 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
367 * it around over a fork(). Let's complain. */
368
df0ff127 369 return e->original_pid != getpid_cached();
eaa3cbef
LP
370}
371
366e6411 372static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
f6806734 376 if (event_pid_changed(s->event))
366e6411 377 return;
f6806734 378
fd38203a 379 if (!s->io.registered)
366e6411 380 return;
fd38203a 381
d1cf2023 382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
f80a5d6a 383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
55cbfaa5 384 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
385
386 s->io.registered = false;
fd38203a
LP
387}
388
305f78bf
LP
389static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
fd38203a
LP
394 assert(s);
395 assert(s->type == SOURCE_IO);
baf76283 396 assert(enabled != SD_EVENT_OFF);
fd38203a 397
1eac7948 398 struct epoll_event ev = {
a82f89aa
LP
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
fd38203a 402
15c689d7 403 if (epoll_ctl(s->event->epoll_fd,
1eac7948 404 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
55c540d3 405 s->io.fd, &ev) < 0)
fd38203a
LP
406 return -errno;
407
408 s->io.registered = true;
409
410 return 0;
411}
412
f8f3f926
LP
413static void source_child_pidfd_unregister(sd_event_source *s) {
414 assert(s);
415 assert(s->type == SOURCE_CHILD);
416
417 if (event_pid_changed(s->event))
418 return;
419
420 if (!s->child.registered)
421 return;
422
423 if (EVENT_SOURCE_WATCH_PIDFD(s))
424 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
f80a5d6a 425 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
f8f3f926
LP
426 strna(s->description), event_source_type_to_string(s->type));
427
428 s->child.registered = false;
429}
430
431static int source_child_pidfd_register(sd_event_source *s, int enabled) {
f8f3f926
LP
432 assert(s);
433 assert(s->type == SOURCE_CHILD);
434 assert(enabled != SD_EVENT_OFF);
435
436 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1eac7948 437 struct epoll_event ev = {
f8f3f926
LP
438 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
439 .data.ptr = s,
440 };
441
55c540d3
ZJS
442 if (epoll_ctl(s->event->epoll_fd,
443 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
444 s->child.pidfd, &ev) < 0)
f8f3f926
LP
445 return -errno;
446 }
447
448 s->child.registered = true;
449 return 0;
450}
451
6a0f1f6d
LP
452static clockid_t event_source_type_to_clock(EventSourceType t) {
453
454 switch (t) {
455
456 case SOURCE_TIME_REALTIME:
457 return CLOCK_REALTIME;
458
a8548816
TG
459 case SOURCE_TIME_BOOTTIME:
460 return CLOCK_BOOTTIME;
461
6a0f1f6d
LP
462 case SOURCE_TIME_MONOTONIC:
463 return CLOCK_MONOTONIC;
464
465 case SOURCE_TIME_REALTIME_ALARM:
466 return CLOCK_REALTIME_ALARM;
467
468 case SOURCE_TIME_BOOTTIME_ALARM:
469 return CLOCK_BOOTTIME_ALARM;
470
471 default:
472 return (clockid_t) -1;
473 }
474}
475
476static EventSourceType clock_to_event_source_type(clockid_t clock) {
477
478 switch (clock) {
479
480 case CLOCK_REALTIME:
481 return SOURCE_TIME_REALTIME;
482
a8548816
TG
483 case CLOCK_BOOTTIME:
484 return SOURCE_TIME_BOOTTIME;
485
6a0f1f6d
LP
486 case CLOCK_MONOTONIC:
487 return SOURCE_TIME_MONOTONIC;
488
489 case CLOCK_REALTIME_ALARM:
490 return SOURCE_TIME_REALTIME_ALARM;
491
492 case CLOCK_BOOTTIME_ALARM:
493 return SOURCE_TIME_BOOTTIME_ALARM;
494
495 default:
496 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
497 }
498}
499
500static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
501 assert(e);
502
503 switch (t) {
504
505 case SOURCE_TIME_REALTIME:
506 return &e->realtime;
507
a8548816
TG
508 case SOURCE_TIME_BOOTTIME:
509 return &e->boottime;
510
6a0f1f6d
LP
511 case SOURCE_TIME_MONOTONIC:
512 return &e->monotonic;
513
514 case SOURCE_TIME_REALTIME_ALARM:
515 return &e->realtime_alarm;
516
517 case SOURCE_TIME_BOOTTIME_ALARM:
518 return &e->boottime_alarm;
519
520 default:
521 return NULL;
522 }
523}
524
3e4eb8e7
YW
525static void event_free_signal_data(sd_event *e, struct signal_data *d) {
526 assert(e);
527
528 if (!d)
529 return;
530
531 hashmap_remove(e->signal_data, &d->priority);
532 safe_close(d->fd);
533 free(d);
534}
535
9da4cb2b
LP
536static int event_make_signal_data(
537 sd_event *e,
538 int sig,
539 struct signal_data **ret) {
4807d2d0 540
9da4cb2b
LP
541 struct signal_data *d;
542 bool added = false;
543 sigset_t ss_copy;
544 int64_t priority;
f95387cd
ZJS
545 int r;
546
547 assert(e);
548
f6806734 549 if (event_pid_changed(e))
9da4cb2b 550 return -ECHILD;
f6806734 551
9da4cb2b
LP
552 if (e->signal_sources && e->signal_sources[sig])
553 priority = e->signal_sources[sig]->priority;
554 else
de05913d 555 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 556
9da4cb2b
LP
557 d = hashmap_get(e->signal_data, &priority);
558 if (d) {
559 if (sigismember(&d->sigset, sig) > 0) {
560 if (ret)
561 *ret = d;
562 return 0;
563 }
564 } else {
565 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
566 if (r < 0)
567 return r;
568
d08eb1fa 569 d = new(struct signal_data, 1);
9da4cb2b
LP
570 if (!d)
571 return -ENOMEM;
572
d08eb1fa
LP
573 *d = (struct signal_data) {
574 .wakeup = WAKEUP_SIGNAL_DATA,
575 .fd = -1,
576 .priority = priority,
577 };
9da4cb2b
LP
578
579 r = hashmap_put(e->signal_data, &d->priority, d);
90f604d1
ZJS
580 if (r < 0) {
581 free(d);
9da4cb2b 582 return r;
90f604d1 583 }
f95387cd 584
9da4cb2b
LP
585 added = true;
586 }
587
588 ss_copy = d->sigset;
589 assert_se(sigaddset(&ss_copy, sig) >= 0);
590
591 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
592 if (r < 0) {
593 r = -errno;
594 goto fail;
595 }
596
597 d->sigset = ss_copy;
f95387cd 598
9da4cb2b
LP
599 if (d->fd >= 0) {
600 if (ret)
601 *ret = d;
f95387cd 602 return 0;
9da4cb2b
LP
603 }
604
7fe2903c 605 d->fd = fd_move_above_stdio(r);
f95387cd 606
1eac7948 607 struct epoll_event ev = {
a82f89aa
LP
608 .events = EPOLLIN,
609 .data.ptr = d,
610 };
f95387cd 611
15c689d7 612 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
9da4cb2b
LP
613 r = -errno;
614 goto fail;
f95387cd
ZJS
615 }
616
9da4cb2b
LP
617 if (ret)
618 *ret = d;
619
f95387cd 620 return 0;
9da4cb2b
LP
621
622fail:
3e4eb8e7
YW
623 if (added)
624 event_free_signal_data(e, d);
9da4cb2b
LP
625
626 return r;
627}
628
629static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
630 assert(e);
631 assert(d);
632
633 /* Turns off the specified signal in the signal data
634 * object. If the signal mask of the object becomes empty that
635 * way removes it. */
636
637 if (sigismember(&d->sigset, sig) == 0)
638 return;
639
640 assert_se(sigdelset(&d->sigset, sig) >= 0);
641
642 if (sigisemptyset(&d->sigset)) {
9da4cb2b 643 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 644 event_free_signal_data(e, d);
9da4cb2b
LP
645 return;
646 }
647
648 assert(d->fd >= 0);
649
650 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
651 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
652}
653
654static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
655 struct signal_data *d;
656 static const int64_t zero_priority = 0;
657
658 assert(e);
659
f8f3f926
LP
660 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
661 * and possibly drop the signalfd for it. */
9da4cb2b
LP
662
663 if (sig == SIGCHLD &&
664 e->n_enabled_child_sources > 0)
665 return;
666
667 if (e->signal_sources &&
668 e->signal_sources[sig] &&
669 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
670 return;
671
672 /*
673 * The specified signal might be enabled in three different queues:
674 *
675 * 1) the one that belongs to the priority passed (if it is non-NULL)
676 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
677 * 3) the 0 priority (to cover the SIGCHLD case)
678 *
679 * Hence, let's remove it from all three here.
680 */
681
682 if (priority) {
683 d = hashmap_get(e->signal_data, priority);
684 if (d)
685 event_unmask_signal_data(e, d, sig);
686 }
687
688 if (e->signal_sources && e->signal_sources[sig]) {
689 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
690 if (d)
691 event_unmask_signal_data(e, d, sig);
692 }
693
694 d = hashmap_get(e->signal_data, &zero_priority);
695 if (d)
696 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
697}
698
e1951c16
MS
699static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
700 assert(s);
701
702 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
703 * they are enabled/disabled or marked pending and such. */
704
705 if (s->pending)
706 prioq_reshuffle(s->event->pending, s, &s->pending_index);
707
708 if (s->prepare)
709 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
710}
711
712static void event_source_time_prioq_reshuffle(sd_event_source *s) {
713 struct clock_data *d;
714
715 assert(s);
716 assert(EVENT_SOURCE_IS_TIME(s->type));
717
718 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
719 * pending, enable state. Makes sure the two prioq's are ordered properly again. */
720 assert_se(d = event_get_clock_data(s->event, s->type));
f41315fc
LP
721 prioq_reshuffle(d->earliest, s, &s->earliest_index);
722 prioq_reshuffle(d->latest, s, &s->latest_index);
e1951c16
MS
723 d->needs_rearm = true;
724}
725
1e45e3fe
LP
726static void event_source_time_prioq_remove(
727 sd_event_source *s,
728 struct clock_data *d) {
729
730 assert(s);
731 assert(d);
732
f41315fc
LP
733 prioq_remove(d->earliest, s, &s->earliest_index);
734 prioq_remove(d->latest, s, &s->latest_index);
735 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
736 d->needs_rearm = true;
737}
738
a71fe8b8
LP
739static void source_disconnect(sd_event_source *s) {
740 sd_event *event;
741
fd38203a
LP
742 assert(s);
743
a71fe8b8
LP
744 if (!s->event)
745 return;
15b38f93 746
a71fe8b8 747 assert(s->event->n_sources > 0);
fd38203a 748
a71fe8b8 749 switch (s->type) {
fd38203a 750
a71fe8b8
LP
751 case SOURCE_IO:
752 if (s->io.fd >= 0)
753 source_io_unregister(s);
fd38203a 754
a71fe8b8 755 break;
6a0f1f6d 756
a71fe8b8 757 case SOURCE_TIME_REALTIME:
a8548816 758 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
759 case SOURCE_TIME_MONOTONIC:
760 case SOURCE_TIME_REALTIME_ALARM:
761 case SOURCE_TIME_BOOTTIME_ALARM: {
762 struct clock_data *d;
1e45e3fe
LP
763 assert_se(d = event_get_clock_data(s->event, s->type));
764 event_source_time_prioq_remove(s, d);
a71fe8b8
LP
765 break;
766 }
767
768 case SOURCE_SIGNAL:
769 if (s->signal.sig > 0) {
9da4cb2b 770
a71fe8b8
LP
771 if (s->event->signal_sources)
772 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 773
9da4cb2b 774 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 775 }
fd38203a 776
a71fe8b8 777 break;
fd38203a 778
a71fe8b8
LP
779 case SOURCE_CHILD:
780 if (s->child.pid > 0) {
781 if (s->enabled != SD_EVENT_OFF) {
782 assert(s->event->n_enabled_child_sources > 0);
783 s->event->n_enabled_child_sources--;
4807d2d0 784 }
fd38203a 785
4a0b58c4 786 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
a71fe8b8 787 }
fd38203a 788
f8f3f926
LP
789 if (EVENT_SOURCE_WATCH_PIDFD(s))
790 source_child_pidfd_unregister(s);
791 else
792 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
793
a71fe8b8 794 break;
fd38203a 795
a71fe8b8
LP
796 case SOURCE_DEFER:
797 /* nothing */
798 break;
fd38203a 799
a71fe8b8
LP
800 case SOURCE_POST:
801 set_remove(s->event->post_sources, s);
802 break;
da7e457c 803
a71fe8b8
LP
804 case SOURCE_EXIT:
805 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
806 break;
0eb2e0e3 807
97ef5391
LP
808 case SOURCE_INOTIFY: {
809 struct inode_data *inode_data;
810
811 inode_data = s->inotify.inode_data;
812 if (inode_data) {
813 struct inotify_data *inotify_data;
814 assert_se(inotify_data = inode_data->inotify_data);
815
816 /* Detach this event source from the inode object */
817 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
818 s->inotify.inode_data = NULL;
819
820 if (s->pending) {
821 assert(inotify_data->n_pending > 0);
822 inotify_data->n_pending--;
823 }
824
825 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
826 * continued to being watched. That's because inotify doesn't really have an API for that: we
827 * can only change watch masks with access to the original inode either by fd or by path. But
828 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 829 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
830 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
831 * there), but given the need for open_by_handle_at() which is privileged and not universally
832 * available this would be quite an incomplete solution. Hence we go the other way, leave the
833 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
834 * anymore after reception. Yes, this sucks, but … Linux … */
835
836 /* Maybe release the inode data (and its inotify) */
837 event_gc_inode_data(s->event, inode_data);
838 }
839
840 break;
841 }
842
a71fe8b8
LP
843 default:
844 assert_not_reached("Wut? I shouldn't exist.");
845 }
6e9feda3 846
a71fe8b8
LP
847 if (s->pending)
848 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 849
a71fe8b8
LP
850 if (s->prepare)
851 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 852
e514aa1e 853 event = TAKE_PTR(s->event);
a71fe8b8
LP
854 LIST_REMOVE(sources, event->sources, s);
855 event->n_sources--;
fd38203a 856
f5982559
LP
857 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
858 * pidfd associated with this event source, which we'll do only on source_free(). */
859
a71fe8b8
LP
860 if (!s->floating)
861 sd_event_unref(event);
862}
863
864static void source_free(sd_event_source *s) {
865 assert(s);
fd38203a 866
a71fe8b8 867 source_disconnect(s);
ab93297c
NM
868
869 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
870 s->io.fd = safe_close(s->io.fd);
871
f8f3f926
LP
872 if (s->type == SOURCE_CHILD) {
873 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
874
875 if (s->child.process_owned) {
876
877 if (!s->child.exited) {
878 bool sent = false;
879
880 if (s->child.pidfd >= 0) {
881 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
882 if (errno == ESRCH) /* Already dead */
883 sent = true;
884 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
885 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
886 s->child.pid);
887 } else
888 sent = true;
889 }
890
891 if (!sent)
892 if (kill(s->child.pid, SIGKILL) < 0)
893 if (errno != ESRCH) /* Already dead */
894 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
895 s->child.pid);
896 }
897
898 if (!s->child.waited) {
899 siginfo_t si = {};
900
901 /* Reap the child if we can */
902 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
903 }
904 }
905
906 if (s->child.pidfd_owned)
907 s->child.pidfd = safe_close(s->child.pidfd);
908 }
909
15723a1d
LP
910 if (s->destroy_callback)
911 s->destroy_callback(s->userdata);
ab93297c 912
356779df 913 free(s->description);
fd38203a
LP
914 free(s);
915}
8c75fe17 916DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
917
918static int source_set_pending(sd_event_source *s, bool b) {
919 int r;
920
921 assert(s);
6203e07a 922 assert(s->type != SOURCE_EXIT);
fd38203a
LP
923
924 if (s->pending == b)
925 return 0;
926
927 s->pending = b;
928
929 if (b) {
930 s->pending_iteration = s->event->iteration;
931
932 r = prioq_put(s->event->pending, s, &s->pending_index);
933 if (r < 0) {
934 s->pending = false;
935 return r;
936 }
937 } else
938 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
939
e1951c16
MS
940 if (EVENT_SOURCE_IS_TIME(s->type))
941 event_source_time_prioq_reshuffle(s);
2576a19e 942
9da4cb2b
LP
943 if (s->type == SOURCE_SIGNAL && !b) {
944 struct signal_data *d;
945
946 d = hashmap_get(s->event->signal_data, &s->priority);
947 if (d && d->current == s)
948 d->current = NULL;
949 }
950
97ef5391
LP
951 if (s->type == SOURCE_INOTIFY) {
952
953 assert(s->inotify.inode_data);
954 assert(s->inotify.inode_data->inotify_data);
955
956 if (b)
957 s->inotify.inode_data->inotify_data->n_pending ++;
958 else {
959 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
960 s->inotify.inode_data->inotify_data->n_pending --;
961 }
962 }
963
fd38203a
LP
964 return 0;
965}
966
a71fe8b8 967static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
968 sd_event_source *s;
969
970 assert(e);
971
d08eb1fa 972 s = new(sd_event_source, 1);
fd38203a
LP
973 if (!s)
974 return NULL;
975
d08eb1fa
LP
976 *s = (struct sd_event_source) {
977 .n_ref = 1,
978 .event = e,
979 .floating = floating,
980 .type = type,
981 .pending_index = PRIOQ_IDX_NULL,
982 .prepare_index = PRIOQ_IDX_NULL,
983 };
a71fe8b8
LP
984
985 if (!floating)
986 sd_event_ref(e);
fd38203a 987
a71fe8b8 988 LIST_PREPEND(sources, e->sources, s);
313cefa1 989 e->n_sources++;
15b38f93 990
fd38203a
LP
991 return s;
992}
993
b9350e70
LP
994static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
995 assert(s);
996
997 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
998}
999
f7262a9f 1000_public_ int sd_event_add_io(
fd38203a 1001 sd_event *e,
151b9b96 1002 sd_event_source **ret,
fd38203a
LP
1003 int fd,
1004 uint32_t events,
718db961 1005 sd_event_io_handler_t callback,
151b9b96 1006 void *userdata) {
fd38203a 1007
ec766a51 1008 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1009 int r;
1010
305f78bf 1011 assert_return(e, -EINVAL);
b937d761 1012 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 1013 assert_return(fd >= 0, -EBADF);
2a16a986 1014 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1015 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1016 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1017
b9350e70
LP
1018 if (!callback)
1019 callback = io_exit_callback;
1020
a71fe8b8 1021 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
1022 if (!s)
1023 return -ENOMEM;
1024
9da4cb2b 1025 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1026 s->io.fd = fd;
1027 s->io.events = events;
1028 s->io.callback = callback;
1029 s->userdata = userdata;
baf76283 1030 s->enabled = SD_EVENT_ON;
fd38203a 1031
baf76283 1032 r = source_io_register(s, s->enabled, events);
ec766a51 1033 if (r < 0)
050f74f2 1034 return r;
fd38203a 1035
a71fe8b8
LP
1036 if (ret)
1037 *ret = s;
ec766a51 1038 TAKE_PTR(s);
a71fe8b8 1039
fd38203a
LP
1040 return 0;
1041}
1042
52444dc4
LP
1043static void initialize_perturb(sd_event *e) {
1044 sd_id128_t bootid = {};
1045
1046 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 1047 the same time within each minute/second/250ms, so that
52444dc4
LP
1048 events all across the system can be coalesced into a single
1049 CPU wakeup. However, let's take some system-specific
1050 randomness for this value, so that in a network of systems
1051 with synced clocks timer events are distributed a
1052 bit. Here, we calculate a perturbation usec offset from the
1053 boot ID. */
1054
3a43da28 1055 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
1056 return;
1057
1058 if (sd_id128_get_boot(&bootid) >= 0)
1059 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1060}
1061
fd38203a
LP
1062static int event_setup_timer_fd(
1063 sd_event *e,
6a0f1f6d
LP
1064 struct clock_data *d,
1065 clockid_t clock) {
fd38203a 1066
fd38203a 1067 assert(e);
6a0f1f6d 1068 assert(d);
fd38203a 1069
6a0f1f6d 1070 if (_likely_(d->fd >= 0))
fd38203a
LP
1071 return 0;
1072
b44d87e2 1073 _cleanup_close_ int fd = -1;
b44d87e2 1074
6a0f1f6d 1075 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
1076 if (fd < 0)
1077 return -errno;
1078
7fe2903c
LP
1079 fd = fd_move_above_stdio(fd);
1080
1eac7948 1081 struct epoll_event ev = {
a82f89aa
LP
1082 .events = EPOLLIN,
1083 .data.ptr = d,
1084 };
fd38203a 1085
15c689d7 1086 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
fd38203a 1087 return -errno;
fd38203a 1088
b44d87e2 1089 d->fd = TAKE_FD(fd);
fd38203a
LP
1090 return 0;
1091}
1092
c4f1aff2
TG
1093static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1094 assert(s);
1095
1096 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1097}
1098
41c63f36
LP
1099static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1100 int r;
1101
1102 assert(d);
1103
1104 if (d->fd < 0) {
1105 r = event_setup_timer_fd(e, d, clock);
1106 if (r < 0)
1107 return r;
1108 }
1109
1110 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1111 if (r < 0)
1112 return r;
1113
1114 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1115 if (r < 0)
1116 return r;
1117
1118 return 0;
1119}
1120
1e45e3fe
LP
1121static int event_source_time_prioq_put(
1122 sd_event_source *s,
1123 struct clock_data *d) {
1124
1125 int r;
1126
1127 assert(s);
1128 assert(d);
1129
f41315fc 1130 r = prioq_put(d->earliest, s, &s->earliest_index);
1e45e3fe
LP
1131 if (r < 0)
1132 return r;
1133
f41315fc 1134 r = prioq_put(d->latest, s, &s->latest_index);
1e45e3fe 1135 if (r < 0) {
f41315fc
LP
1136 assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1137 s->earliest_index = PRIOQ_IDX_NULL;
1e45e3fe
LP
1138 return r;
1139 }
1140
1141 d->needs_rearm = true;
1142 return 0;
1143}
1144
6a0f1f6d 1145_public_ int sd_event_add_time(
fd38203a 1146 sd_event *e,
151b9b96 1147 sd_event_source **ret,
6a0f1f6d 1148 clockid_t clock,
fd38203a 1149 uint64_t usec,
c2ba3ad6 1150 uint64_t accuracy,
718db961 1151 sd_event_time_handler_t callback,
151b9b96 1152 void *userdata) {
fd38203a 1153
6a0f1f6d 1154 EventSourceType type;
ec766a51 1155 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 1156 struct clock_data *d;
fd38203a
LP
1157 int r;
1158
305f78bf 1159 assert_return(e, -EINVAL);
b937d761 1160 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 1161 assert_return(accuracy != (uint64_t) -1, -EINVAL);
da7e457c 1162 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1163 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1164
e475d10c
LP
1165 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1166 return -EOPNOTSUPP;
1167
1168 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1169 if (type < 0)
3411372e
LP
1170 return -EOPNOTSUPP;
1171
c4f1aff2
TG
1172 if (!callback)
1173 callback = time_exit_callback;
1174
1e45e3fe 1175 assert_se(d = event_get_clock_data(e, type));
c2ba3ad6 1176
41c63f36 1177 r = setup_clock_data(e, d, clock);
c983e776
EV
1178 if (r < 0)
1179 return r;
fd38203a 1180
a71fe8b8 1181 s = source_new(e, !ret, type);
fd38203a
LP
1182 if (!s)
1183 return -ENOMEM;
1184
1185 s->time.next = usec;
c2ba3ad6 1186 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1187 s->time.callback = callback;
f41315fc 1188 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
fd38203a 1189 s->userdata = userdata;
baf76283 1190 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1191
1e45e3fe 1192 r = event_source_time_prioq_put(s, d);
c2ba3ad6 1193 if (r < 0)
ec766a51 1194 return r;
fd38203a 1195
a71fe8b8
LP
1196 if (ret)
1197 *ret = s;
ec766a51 1198 TAKE_PTR(s);
a71fe8b8 1199
fd38203a
LP
1200 return 0;
1201}
1202
d6a83dc4
LP
1203_public_ int sd_event_add_time_relative(
1204 sd_event *e,
1205 sd_event_source **ret,
1206 clockid_t clock,
1207 uint64_t usec,
1208 uint64_t accuracy,
1209 sd_event_time_handler_t callback,
1210 void *userdata) {
1211
1212 usec_t t;
1213 int r;
1214
1215 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1216 * checks for overflow. */
1217
1218 r = sd_event_now(e, clock, &t);
1219 if (r < 0)
1220 return r;
1221
1222 if (usec >= USEC_INFINITY - t)
1223 return -EOVERFLOW;
1224
1225 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1226}
1227
59bc1fd7
LP
1228static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1229 assert(s);
1230
1231 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1232}
1233
f7262a9f 1234_public_ int sd_event_add_signal(
305f78bf 1235 sd_event *e,
151b9b96 1236 sd_event_source **ret,
305f78bf 1237 int sig,
718db961 1238 sd_event_signal_handler_t callback,
151b9b96 1239 void *userdata) {
305f78bf 1240
ec766a51 1241 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1242 struct signal_data *d;
fd38203a
LP
1243 int r;
1244
305f78bf 1245 assert_return(e, -EINVAL);
b937d761 1246 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1247 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1248 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1249 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1250
59bc1fd7
LP
1251 if (!callback)
1252 callback = signal_exit_callback;
1253
d1b75241
LP
1254 r = signal_is_blocked(sig);
1255 if (r < 0)
1256 return r;
1257 if (r == 0)
3022d74b
LP
1258 return -EBUSY;
1259
fd38203a
LP
1260 if (!e->signal_sources) {
1261 e->signal_sources = new0(sd_event_source*, _NSIG);
1262 if (!e->signal_sources)
1263 return -ENOMEM;
1264 } else if (e->signal_sources[sig])
1265 return -EBUSY;
1266
a71fe8b8 1267 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1268 if (!s)
1269 return -ENOMEM;
1270
1271 s->signal.sig = sig;
1272 s->signal.callback = callback;
1273 s->userdata = userdata;
baf76283 1274 s->enabled = SD_EVENT_ON;
fd38203a
LP
1275
1276 e->signal_sources[sig] = s;
fd38203a 1277
9da4cb2b 1278 r = event_make_signal_data(e, sig, &d);
ec766a51 1279 if (r < 0)
9da4cb2b 1280 return r;
fd38203a 1281
f1f00dbb
LP
1282 /* Use the signal name as description for the event source by default */
1283 (void) sd_event_source_set_description(s, signal_to_string(sig));
1284
a71fe8b8
LP
1285 if (ret)
1286 *ret = s;
ec766a51 1287 TAKE_PTR(s);
a71fe8b8 1288
fd38203a
LP
1289 return 0;
1290}
1291
b9350e70
LP
1292static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1293 assert(s);
1294
1295 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1296}
1297
f8f3f926
LP
1298static bool shall_use_pidfd(void) {
1299 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1300 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1301}
1302
f7262a9f 1303_public_ int sd_event_add_child(
305f78bf 1304 sd_event *e,
151b9b96 1305 sd_event_source **ret,
305f78bf
LP
1306 pid_t pid,
1307 int options,
718db961 1308 sd_event_child_handler_t callback,
151b9b96 1309 void *userdata) {
305f78bf 1310
ec766a51 1311 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1312 int r;
1313
305f78bf 1314 assert_return(e, -EINVAL);
b937d761 1315 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1316 assert_return(pid > 1, -EINVAL);
1317 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1318 assert_return(options != 0, -EINVAL);
da7e457c 1319 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1320 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1321
b9350e70
LP
1322 if (!callback)
1323 callback = child_exit_callback;
1324
ee880b37
LP
1325 if (e->n_enabled_child_sources == 0) {
1326 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1327 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1328 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1329 * take effect.
1330 *
1331 * (As an optimization we only do this check on the first child event source created.) */
1332 r = signal_is_blocked(SIGCHLD);
1333 if (r < 0)
1334 return r;
1335 if (r == 0)
1336 return -EBUSY;
1337 }
1338
d5099efc 1339 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1340 if (r < 0)
1341 return r;
1342
4a0b58c4 1343 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1344 return -EBUSY;
1345
a71fe8b8 1346 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1347 if (!s)
1348 return -ENOMEM;
1349
f8f3f926 1350 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
1351 s->child.pid = pid;
1352 s->child.options = options;
1353 s->child.callback = callback;
1354 s->userdata = userdata;
baf76283 1355 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1356
f8f3f926
LP
1357 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1358 * pin the PID, and make regular waitid() handling race-free. */
1359
1360 if (shall_use_pidfd()) {
1361 s->child.pidfd = pidfd_open(s->child.pid, 0);
1362 if (s->child.pidfd < 0) {
1363 /* Propagate errors unless the syscall is not supported or blocked */
1364 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1365 return -errno;
1366 } else
1367 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1368 } else
1369 s->child.pidfd = -1;
1370
4a0b58c4 1371 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1372 if (r < 0)
fd38203a 1373 return r;
fd38203a 1374
f8f3f926
LP
1375 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1376 /* We have a pidfd and we only want to watch for exit */
f8f3f926 1377 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1378 if (r < 0)
f8f3f926 1379 return r;
ac9f2640 1380
f8f3f926
LP
1381 } else {
1382 /* We have no pidfd or we shall wait for some other event than WEXITED */
f8f3f926 1383 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1384 if (r < 0)
f8f3f926 1385 return r;
f8f3f926
LP
1386
1387 e->need_process_child = true;
1388 }
c2ba3ad6 1389
ac9f2640
ZJS
1390 e->n_enabled_child_sources++;
1391
a71fe8b8
LP
1392 if (ret)
1393 *ret = s;
ec766a51 1394 TAKE_PTR(s);
f8f3f926
LP
1395 return 0;
1396}
1397
1398_public_ int sd_event_add_child_pidfd(
1399 sd_event *e,
1400 sd_event_source **ret,
1401 int pidfd,
1402 int options,
1403 sd_event_child_handler_t callback,
1404 void *userdata) {
1405
1406
1407 _cleanup_(source_freep) sd_event_source *s = NULL;
1408 pid_t pid;
1409 int r;
1410
1411 assert_return(e, -EINVAL);
1412 assert_return(e = event_resolve(e), -ENOPKG);
1413 assert_return(pidfd >= 0, -EBADF);
1414 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1415 assert_return(options != 0, -EINVAL);
f8f3f926
LP
1416 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1417 assert_return(!event_pid_changed(e), -ECHILD);
1418
b9350e70
LP
1419 if (!callback)
1420 callback = child_exit_callback;
1421
ee880b37
LP
1422 if (e->n_enabled_child_sources == 0) {
1423 r = signal_is_blocked(SIGCHLD);
1424 if (r < 0)
1425 return r;
1426 if (r == 0)
1427 return -EBUSY;
1428 }
1429
f8f3f926
LP
1430 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1431 if (r < 0)
1432 return r;
1433
1434 r = pidfd_get_pid(pidfd, &pid);
1435 if (r < 0)
1436 return r;
1437
1438 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1439 return -EBUSY;
1440
1441 s = source_new(e, !ret, SOURCE_CHILD);
1442 if (!s)
1443 return -ENOMEM;
1444
1445 s->wakeup = WAKEUP_EVENT_SOURCE;
1446 s->child.pidfd = pidfd;
1447 s->child.pid = pid;
1448 s->child.options = options;
1449 s->child.callback = callback;
1450 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1451 s->userdata = userdata;
1452 s->enabled = SD_EVENT_ONESHOT;
1453
1454 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1455 if (r < 0)
1456 return r;
1457
f8f3f926
LP
1458 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1459 /* We only want to watch for WEXITED */
f8f3f926 1460 r = source_child_pidfd_register(s, s->enabled);
ac9f2640 1461 if (r < 0)
f8f3f926 1462 return r;
f8f3f926
LP
1463 } else {
1464 /* We shall wait for some other event than WEXITED */
f8f3f926 1465 r = event_make_signal_data(e, SIGCHLD, NULL);
ac9f2640 1466 if (r < 0)
f8f3f926 1467 return r;
a71fe8b8 1468
f8f3f926
LP
1469 e->need_process_child = true;
1470 }
1471
ac9f2640
ZJS
1472 e->n_enabled_child_sources++;
1473
f8f3f926
LP
1474 if (ret)
1475 *ret = s;
f8f3f926 1476 TAKE_PTR(s);
fd38203a
LP
1477 return 0;
1478}
1479
b9350e70
LP
1480static int generic_exit_callback(sd_event_source *s, void *userdata) {
1481 assert(s);
1482
1483 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1484}
1485
f7262a9f 1486_public_ int sd_event_add_defer(
305f78bf 1487 sd_event *e,
151b9b96 1488 sd_event_source **ret,
718db961 1489 sd_event_handler_t callback,
151b9b96 1490 void *userdata) {
305f78bf 1491
ec766a51 1492 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1493 int r;
1494
305f78bf 1495 assert_return(e, -EINVAL);
b937d761 1496 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 1497 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1498 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1499
b9350e70
LP
1500 if (!callback)
1501 callback = generic_exit_callback;
1502
a71fe8b8 1503 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1504 if (!s)
1505 return -ENOMEM;
1506
1507 s->defer.callback = callback;
1508 s->userdata = userdata;
baf76283 1509 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1510
1511 r = source_set_pending(s, true);
ec766a51 1512 if (r < 0)
fd38203a 1513 return r;
fd38203a 1514
a71fe8b8
LP
1515 if (ret)
1516 *ret = s;
ec766a51 1517 TAKE_PTR(s);
a71fe8b8 1518
fd38203a
LP
1519 return 0;
1520}
1521
6e9feda3
LP
1522_public_ int sd_event_add_post(
1523 sd_event *e,
1524 sd_event_source **ret,
1525 sd_event_handler_t callback,
1526 void *userdata) {
1527
ec766a51 1528 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1529 int r;
1530
1531 assert_return(e, -EINVAL);
b937d761 1532 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3
LP
1533 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1534 assert_return(!event_pid_changed(e), -ECHILD);
1535
b9350e70
LP
1536 if (!callback)
1537 callback = generic_exit_callback;
1538
a71fe8b8 1539 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1540 if (!s)
1541 return -ENOMEM;
1542
1543 s->post.callback = callback;
1544 s->userdata = userdata;
1545 s->enabled = SD_EVENT_ON;
1546
de7fef4b 1547 r = set_ensure_put(&e->post_sources, NULL, s);
ec766a51 1548 if (r < 0)
6e9feda3 1549 return r;
de7fef4b 1550 assert(r > 0);
6e9feda3 1551
a71fe8b8
LP
1552 if (ret)
1553 *ret = s;
ec766a51 1554 TAKE_PTR(s);
a71fe8b8 1555
6e9feda3
LP
1556 return 0;
1557}
1558
6203e07a 1559_public_ int sd_event_add_exit(
305f78bf 1560 sd_event *e,
151b9b96 1561 sd_event_source **ret,
718db961 1562 sd_event_handler_t callback,
151b9b96 1563 void *userdata) {
305f78bf 1564
ec766a51 1565 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1566 int r;
1567
1568 assert_return(e, -EINVAL);
b937d761 1569 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1570 assert_return(callback, -EINVAL);
1571 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1572 assert_return(!event_pid_changed(e), -ECHILD);
1573
c983e776
EV
1574 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1575 if (r < 0)
1576 return r;
da7e457c 1577
a71fe8b8 1578 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1579 if (!s)
da7e457c 1580 return -ENOMEM;
fd38203a 1581
6203e07a 1582 s->exit.callback = callback;
da7e457c 1583 s->userdata = userdata;
6203e07a 1584 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1585 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1586
6203e07a 1587 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1588 if (r < 0)
da7e457c 1589 return r;
da7e457c 1590
a71fe8b8
LP
1591 if (ret)
1592 *ret = s;
ec766a51 1593 TAKE_PTR(s);
a71fe8b8 1594
da7e457c
LP
1595 return 0;
1596}
1597
97ef5391
LP
1598static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1599 assert(e);
1600
1601 if (!d)
1602 return;
1603
1604 assert(hashmap_isempty(d->inodes));
1605 assert(hashmap_isempty(d->wd));
1606
1607 if (d->buffer_filled > 0)
1608 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1609
1610 hashmap_free(d->inodes);
1611 hashmap_free(d->wd);
1612
1613 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1614
1615 if (d->fd >= 0) {
1616 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1617 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1618
1619 safe_close(d->fd);
1620 }
1621 free(d);
1622}
1623
1624static int event_make_inotify_data(
1625 sd_event *e,
1626 int64_t priority,
1627 struct inotify_data **ret) {
1628
1629 _cleanup_close_ int fd = -1;
1630 struct inotify_data *d;
97ef5391
LP
1631 int r;
1632
1633 assert(e);
1634
1635 d = hashmap_get(e->inotify_data, &priority);
1636 if (d) {
1637 if (ret)
1638 *ret = d;
1639 return 0;
1640 }
1641
1642 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1643 if (fd < 0)
1644 return -errno;
1645
1646 fd = fd_move_above_stdio(fd);
1647
1648 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1649 if (r < 0)
1650 return r;
1651
1652 d = new(struct inotify_data, 1);
1653 if (!d)
1654 return -ENOMEM;
1655
1656 *d = (struct inotify_data) {
1657 .wakeup = WAKEUP_INOTIFY_DATA,
1658 .fd = TAKE_FD(fd),
1659 .priority = priority,
1660 };
1661
1662 r = hashmap_put(e->inotify_data, &d->priority, d);
1663 if (r < 0) {
1664 d->fd = safe_close(d->fd);
1665 free(d);
1666 return r;
1667 }
1668
1eac7948 1669 struct epoll_event ev = {
97ef5391
LP
1670 .events = EPOLLIN,
1671 .data.ptr = d,
1672 };
1673
1674 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1675 r = -errno;
1676 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1677 * remove the fd from the epoll first, which we don't want as we couldn't
1678 * add it in the first place. */
1679 event_free_inotify_data(e, d);
1680 return r;
1681 }
1682
1683 if (ret)
1684 *ret = d;
1685
1686 return 1;
1687}
1688
7a08d314 1689static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1690 int r;
97ef5391
LP
1691
1692 assert(x);
1693 assert(y);
1694
90c88092
YW
1695 r = CMP(x->dev, y->dev);
1696 if (r != 0)
1697 return r;
97ef5391 1698
6dd91b36 1699 return CMP(x->ino, y->ino);
97ef5391
LP
1700}
1701
7a08d314
YW
1702static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1703 assert(d);
97ef5391
LP
1704
1705 siphash24_compress(&d->dev, sizeof(d->dev), state);
1706 siphash24_compress(&d->ino, sizeof(d->ino), state);
1707}
1708
7a08d314 1709DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1710
1711static void event_free_inode_data(
1712 sd_event *e,
1713 struct inode_data *d) {
1714
1715 assert(e);
1716
1717 if (!d)
1718 return;
1719
1720 assert(!d->event_sources);
1721
1722 if (d->fd >= 0) {
1723 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1724 safe_close(d->fd);
1725 }
1726
1727 if (d->inotify_data) {
1728
1729 if (d->wd >= 0) {
1730 if (d->inotify_data->fd >= 0) {
1731 /* So here's a problem. At the time this runs the watch descriptor might already be
1732 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1733 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1734 * likely case to happen. */
1735
1736 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1737 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1738 }
1739
1740 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1741 }
1742
1743 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1744 }
1745
1746 free(d);
1747}
1748
1749static void event_gc_inode_data(
1750 sd_event *e,
1751 struct inode_data *d) {
1752
1753 struct inotify_data *inotify_data;
1754
1755 assert(e);
1756
1757 if (!d)
1758 return;
1759
1760 if (d->event_sources)
1761 return;
1762
1763 inotify_data = d->inotify_data;
1764 event_free_inode_data(e, d);
1765
1766 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1767 event_free_inotify_data(e, inotify_data);
1768}
1769
1770static int event_make_inode_data(
1771 sd_event *e,
1772 struct inotify_data *inotify_data,
1773 dev_t dev,
1774 ino_t ino,
1775 struct inode_data **ret) {
1776
1777 struct inode_data *d, key;
1778 int r;
1779
1780 assert(e);
1781 assert(inotify_data);
1782
1783 key = (struct inode_data) {
1784 .ino = ino,
1785 .dev = dev,
1786 };
1787
1788 d = hashmap_get(inotify_data->inodes, &key);
1789 if (d) {
1790 if (ret)
1791 *ret = d;
1792
1793 return 0;
1794 }
1795
1796 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1797 if (r < 0)
1798 return r;
1799
1800 d = new(struct inode_data, 1);
1801 if (!d)
1802 return -ENOMEM;
1803
1804 *d = (struct inode_data) {
1805 .dev = dev,
1806 .ino = ino,
1807 .wd = -1,
1808 .fd = -1,
1809 .inotify_data = inotify_data,
1810 };
1811
1812 r = hashmap_put(inotify_data->inodes, d, d);
1813 if (r < 0) {
1814 free(d);
1815 return r;
1816 }
1817
1818 if (ret)
1819 *ret = d;
1820
1821 return 1;
1822}
1823
1824static uint32_t inode_data_determine_mask(struct inode_data *d) {
1825 bool excl_unlink = true;
1826 uint32_t combined = 0;
1827 sd_event_source *s;
1828
1829 assert(d);
1830
1831 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1832 * the IN_EXCL_UNLINK flag is ANDed instead.
1833 *
1834 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1835 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1836 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1837 * events we don't care for client-side. */
1838
1839 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1840
1841 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1842 excl_unlink = false;
1843
1844 combined |= s->inotify.mask;
1845 }
1846
1847 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1848}
1849
1850static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1851 uint32_t combined_mask;
1852 int wd, r;
1853
1854 assert(d);
1855 assert(d->fd >= 0);
1856
1857 combined_mask = inode_data_determine_mask(d);
1858
1859 if (d->wd >= 0 && combined_mask == d->combined_mask)
1860 return 0;
1861
1862 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1863 if (r < 0)
1864 return r;
1865
1866 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1867 if (wd < 0)
1868 return -errno;
1869
1870 if (d->wd < 0) {
1871 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1872 if (r < 0) {
1873 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1874 return r;
1875 }
1876
1877 d->wd = wd;
1878
1879 } else if (d->wd != wd) {
1880
1881 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1882 (void) inotify_rm_watch(d->fd, wd);
1883 return -EINVAL;
1884 }
1885
1886 d->combined_mask = combined_mask;
1887 return 1;
1888}
1889
b9350e70
LP
1890static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1891 assert(s);
1892
1893 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1894}
1895
97ef5391
LP
1896_public_ int sd_event_add_inotify(
1897 sd_event *e,
1898 sd_event_source **ret,
1899 const char *path,
1900 uint32_t mask,
1901 sd_event_inotify_handler_t callback,
1902 void *userdata) {
1903
97ef5391
LP
1904 struct inotify_data *inotify_data = NULL;
1905 struct inode_data *inode_data = NULL;
1906 _cleanup_close_ int fd = -1;
8c75fe17 1907 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
1908 struct stat st;
1909 int r;
1910
1911 assert_return(e, -EINVAL);
1912 assert_return(e = event_resolve(e), -ENOPKG);
1913 assert_return(path, -EINVAL);
97ef5391
LP
1914 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1915 assert_return(!event_pid_changed(e), -ECHILD);
1916
b9350e70
LP
1917 if (!callback)
1918 callback = inotify_exit_callback;
1919
97ef5391
LP
1920 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1921 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1922 * the user can't use them for us. */
1923 if (mask & IN_MASK_ADD)
1924 return -EINVAL;
1925
1926 fd = open(path, O_PATH|O_CLOEXEC|
1927 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1928 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1929 if (fd < 0)
1930 return -errno;
1931
1932 if (fstat(fd, &st) < 0)
1933 return -errno;
1934
1935 s = source_new(e, !ret, SOURCE_INOTIFY);
1936 if (!s)
1937 return -ENOMEM;
1938
1939 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1940 s->inotify.mask = mask;
1941 s->inotify.callback = callback;
1942 s->userdata = userdata;
1943
1944 /* Allocate an inotify object for this priority, and an inode object within it */
1945 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1946 if (r < 0)
8c75fe17 1947 return r;
97ef5391
LP
1948
1949 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17
ZJS
1950 if (r < 0) {
1951 event_free_inotify_data(e, inotify_data);
1952 return r;
1953 }
97ef5391
LP
1954
1955 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1956 * the event source, until then, for which we need the original inode. */
1957 if (inode_data->fd < 0) {
1958 inode_data->fd = TAKE_FD(fd);
1959 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1960 }
1961
1962 /* Link our event source to the inode data object */
1963 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1964 s->inotify.inode_data = inode_data;
1965
97ef5391
LP
1966 /* Actually realize the watch now */
1967 r = inode_data_realize_watch(e, inode_data);
1968 if (r < 0)
8c75fe17 1969 return r;
97ef5391
LP
1970
1971 (void) sd_event_source_set_description(s, path);
1972
1973 if (ret)
1974 *ret = s;
8c75fe17 1975 TAKE_PTR(s);
97ef5391
LP
1976
1977 return 0;
97ef5391
LP
1978}
1979
8301aa0b 1980static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
1981 if (!s)
1982 return NULL;
da7e457c 1983
8301aa0b
YW
1984 /* Here's a special hack: when we are called from a
1985 * dispatch handler we won't free the event source
1986 * immediately, but we will detach the fd from the
1987 * epoll. This way it is safe for the caller to unref
1988 * the event source and immediately close the fd, but
1989 * we still retain a valid event source object after
1990 * the callback. */
fd38203a 1991
8301aa0b
YW
1992 if (s->dispatching) {
1993 if (s->type == SOURCE_IO)
1994 source_io_unregister(s);
fd38203a 1995
8301aa0b
YW
1996 source_disconnect(s);
1997 } else
1998 source_free(s);
fd38203a
LP
1999
2000 return NULL;
2001}
2002
8301aa0b
YW
2003DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2004
356779df 2005_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 2006 assert_return(s, -EINVAL);
f4b2933e 2007 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2008
356779df 2009 return free_and_strdup(&s->description, description);
f7f53e9e
TG
2010}
2011
356779df 2012_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 2013 assert_return(s, -EINVAL);
356779df 2014 assert_return(description, -EINVAL);
f4b2933e 2015 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 2016
7d92a1a4
ZJS
2017 if (!s->description)
2018 return -ENXIO;
2019
356779df 2020 *description = s->description;
f7f53e9e
TG
2021 return 0;
2022}
2023
adcc4ca3 2024_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 2025 assert_return(s, NULL);
eaa3cbef
LP
2026
2027 return s->event;
2028}
2029
f7262a9f 2030_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 2031 assert_return(s, -EINVAL);
6203e07a 2032 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 2033 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2034 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2035
2036 return s->pending;
2037}
2038
f7262a9f 2039_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
2040 assert_return(s, -EINVAL);
2041 assert_return(s->type == SOURCE_IO, -EDOM);
2042 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2043
2044 return s->io.fd;
2045}
2046
30caf8f3
LP
2047_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2048 int r;
2049
2050 assert_return(s, -EINVAL);
8ac43fee 2051 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
2052 assert_return(s->type == SOURCE_IO, -EDOM);
2053 assert_return(!event_pid_changed(s->event), -ECHILD);
2054
2055 if (s->io.fd == fd)
2056 return 0;
2057
2058 if (s->enabled == SD_EVENT_OFF) {
2059 s->io.fd = fd;
2060 s->io.registered = false;
2061 } else {
2062 int saved_fd;
2063
2064 saved_fd = s->io.fd;
2065 assert(s->io.registered);
2066
2067 s->io.fd = fd;
2068 s->io.registered = false;
2069
2070 r = source_io_register(s, s->enabled, s->io.events);
2071 if (r < 0) {
2072 s->io.fd = saved_fd;
2073 s->io.registered = true;
2074 return r;
2075 }
2076
5a795bff 2077 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
30caf8f3
LP
2078 }
2079
2080 return 0;
2081}
2082
ab93297c
NM
2083_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2084 assert_return(s, -EINVAL);
2085 assert_return(s->type == SOURCE_IO, -EDOM);
2086
2087 return s->io.owned;
2088}
2089
2090_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2091 assert_return(s, -EINVAL);
2092 assert_return(s->type == SOURCE_IO, -EDOM);
2093
2094 s->io.owned = own;
2095 return 0;
2096}
2097
f7262a9f 2098_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
2099 assert_return(s, -EINVAL);
2100 assert_return(events, -EINVAL);
2101 assert_return(s->type == SOURCE_IO, -EDOM);
2102 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2103
2104 *events = s->io.events;
2105 return 0;
2106}
2107
f7262a9f 2108_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
2109 int r;
2110
305f78bf
LP
2111 assert_return(s, -EINVAL);
2112 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 2113 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 2114 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2115 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2116
b63c8d4f
DH
2117 /* edge-triggered updates are never skipped, so we can reset edges */
2118 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
2119 return 0;
2120
2a0dc6cd
LP
2121 r = source_set_pending(s, false);
2122 if (r < 0)
2123 return r;
2124
baf76283 2125 if (s->enabled != SD_EVENT_OFF) {
e4715127 2126 r = source_io_register(s, s->enabled, events);
fd38203a
LP
2127 if (r < 0)
2128 return r;
2129 }
2130
2131 s->io.events = events;
2132
2133 return 0;
2134}
2135
f7262a9f 2136_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
2137 assert_return(s, -EINVAL);
2138 assert_return(revents, -EINVAL);
2139 assert_return(s->type == SOURCE_IO, -EDOM);
2140 assert_return(s->pending, -ENODATA);
2141 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2142
2143 *revents = s->io.revents;
2144 return 0;
2145}
2146
f7262a9f 2147_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
2148 assert_return(s, -EINVAL);
2149 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2150 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2151
2152 return s->signal.sig;
2153}
2154
31927c16 2155_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
2156 assert_return(s, -EINVAL);
2157 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2158
6680b8d1
ME
2159 *priority = s->priority;
2160 return 0;
fd38203a
LP
2161}
2162
31927c16 2163_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
2164 bool rm_inotify = false, rm_inode = false;
2165 struct inotify_data *new_inotify_data = NULL;
2166 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
2167 int r;
2168
305f78bf 2169 assert_return(s, -EINVAL);
da7e457c 2170 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2171 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2172
2173 if (s->priority == priority)
2174 return 0;
2175
97ef5391
LP
2176 if (s->type == SOURCE_INOTIFY) {
2177 struct inode_data *old_inode_data;
2178
2179 assert(s->inotify.inode_data);
2180 old_inode_data = s->inotify.inode_data;
2181
2182 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2183 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2184 * events we allow priority changes only until the first following iteration. */
2185 if (old_inode_data->fd < 0)
2186 return -EOPNOTSUPP;
2187
2188 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2189 if (r < 0)
2190 return r;
2191 rm_inotify = r > 0;
2192
2193 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2194 if (r < 0)
2195 goto fail;
2196 rm_inode = r > 0;
2197
2198 if (new_inode_data->fd < 0) {
2199 /* Duplicate the fd for the new inode object if we don't have any yet */
2200 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2201 if (new_inode_data->fd < 0) {
2202 r = -errno;
2203 goto fail;
2204 }
2205
2206 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2207 }
2208
2209 /* Move the event source to the new inode data structure */
2210 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2211 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2212 s->inotify.inode_data = new_inode_data;
2213
2214 /* Now create the new watch */
2215 r = inode_data_realize_watch(s->event, new_inode_data);
2216 if (r < 0) {
2217 /* Move it back */
2218 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2219 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2220 s->inotify.inode_data = old_inode_data;
2221 goto fail;
2222 }
2223
2224 s->priority = priority;
2225
2226 event_gc_inode_data(s->event, old_inode_data);
2227
2228 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
9da4cb2b
LP
2229 struct signal_data *old, *d;
2230
2231 /* Move us from the signalfd belonging to the old
2232 * priority to the signalfd of the new priority */
2233
2234 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2235
2236 s->priority = priority;
2237
2238 r = event_make_signal_data(s->event, s->signal.sig, &d);
2239 if (r < 0) {
2240 s->priority = old->priority;
2241 return r;
2242 }
2243
2244 event_unmask_signal_data(s->event, old, s->signal.sig);
2245 } else
2246 s->priority = priority;
fd38203a 2247
e1951c16 2248 event_source_pp_prioq_reshuffle(s);
fd38203a 2249
6203e07a
LP
2250 if (s->type == SOURCE_EXIT)
2251 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 2252
fd38203a 2253 return 0;
97ef5391
LP
2254
2255fail:
2256 if (rm_inode)
2257 event_free_inode_data(s->event, new_inode_data);
2258
2259 if (rm_inotify)
2260 event_free_inotify_data(s->event, new_inotify_data);
2261
2262 return r;
fd38203a
LP
2263}
2264
cad143a8 2265_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
305f78bf 2266 assert_return(s, -EINVAL);
305f78bf 2267 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2268
cad143a8
LP
2269 if (ret)
2270 *ret = s->enabled;
2271
08c1eb0e 2272 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
2273}
2274
ddfde737 2275static int event_source_disable(sd_event_source *s) {
fd38203a
LP
2276 int r;
2277
ddfde737
LP
2278 assert(s);
2279 assert(s->enabled != SD_EVENT_OFF);
fd38203a 2280
ddfde737
LP
2281 /* Unset the pending flag when this event source is disabled */
2282 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2283 r = source_set_pending(s, false);
2284 if (r < 0)
2285 return r;
2286 }
cc567911 2287
ddfde737 2288 s->enabled = SD_EVENT_OFF;
fd38203a 2289
ddfde737 2290 switch (s->type) {
fd38203a 2291
ddfde737
LP
2292 case SOURCE_IO:
2293 source_io_unregister(s);
2294 break;
ac989a78 2295
ddfde737
LP
2296 case SOURCE_TIME_REALTIME:
2297 case SOURCE_TIME_BOOTTIME:
2298 case SOURCE_TIME_MONOTONIC:
2299 case SOURCE_TIME_REALTIME_ALARM:
2300 case SOURCE_TIME_BOOTTIME_ALARM:
2301 event_source_time_prioq_reshuffle(s);
2302 break;
fd38203a 2303
ddfde737
LP
2304 case SOURCE_SIGNAL:
2305 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2306 break;
fd38203a 2307
ddfde737
LP
2308 case SOURCE_CHILD:
2309 assert(s->event->n_enabled_child_sources > 0);
2310 s->event->n_enabled_child_sources--;
fd38203a 2311
ddfde737
LP
2312 if (EVENT_SOURCE_WATCH_PIDFD(s))
2313 source_child_pidfd_unregister(s);
2314 else
2315 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2316 break;
4807d2d0 2317
ddfde737
LP
2318 case SOURCE_EXIT:
2319 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2320 break;
fd38203a 2321
ddfde737
LP
2322 case SOURCE_DEFER:
2323 case SOURCE_POST:
2324 case SOURCE_INOTIFY:
2325 break;
fd38203a 2326
ddfde737
LP
2327 default:
2328 assert_not_reached("Wut? I shouldn't exist.");
2329 }
fd38203a 2330
ddfde737
LP
2331 return 0;
2332}
f8f3f926 2333
d2eafe61 2334static int event_source_enable(sd_event_source *s, int enable) {
ddfde737 2335 int r;
fd38203a 2336
ddfde737 2337 assert(s);
d2eafe61 2338 assert(IN_SET(enable, SD_EVENT_ON, SD_EVENT_ONESHOT));
ddfde737 2339 assert(s->enabled == SD_EVENT_OFF);
305f78bf 2340
ddfde737
LP
2341 /* Unset the pending flag when this event source is enabled */
2342 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2343 r = source_set_pending(s, false);
2344 if (r < 0)
2345 return r;
2346 }
9d3e3aa5 2347
ddfde737 2348 switch (s->type) {
ddfde737 2349 case SOURCE_IO:
d2eafe61
ZJS
2350 r = source_io_register(s, enable, s->io.events);
2351 if (r < 0)
ddfde737 2352 return r;
ddfde737 2353 break;
fd38203a 2354
ddfde737
LP
2355 case SOURCE_SIGNAL:
2356 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2357 if (r < 0) {
ddfde737
LP
2358 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2359 return r;
2360 }
fd38203a 2361
ddfde737 2362 break;
fd38203a 2363
ddfde737 2364 case SOURCE_CHILD:
ddfde737
LP
2365 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2366 /* yes, we have pidfd */
9da4cb2b 2367
d2eafe61 2368 r = source_child_pidfd_register(s, enable);
ac9f2640 2369 if (r < 0)
9da4cb2b 2370 return r;
ddfde737
LP
2371 } else {
2372 /* no pidfd, or something other to watch for than WEXITED */
9da4cb2b 2373
ddfde737
LP
2374 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2375 if (r < 0) {
ddfde737
LP
2376 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2377 return r;
2378 }
2379 }
fd38203a 2380
ac9f2640
ZJS
2381 s->event->n_enabled_child_sources++;
2382
ddfde737 2383 break;
4807d2d0 2384
d2eafe61
ZJS
2385 case SOURCE_TIME_REALTIME:
2386 case SOURCE_TIME_BOOTTIME:
2387 case SOURCE_TIME_MONOTONIC:
2388 case SOURCE_TIME_REALTIME_ALARM:
2389 case SOURCE_TIME_BOOTTIME_ALARM:
ddfde737 2390 case SOURCE_EXIT:
ddfde737
LP
2391 case SOURCE_DEFER:
2392 case SOURCE_POST:
2393 case SOURCE_INOTIFY:
2394 break;
9da4cb2b 2395
ddfde737
LP
2396 default:
2397 assert_not_reached("Wut? I shouldn't exist.");
2398 }
f8f3f926 2399
d2eafe61
ZJS
2400 s->enabled = enable;
2401
2402 /* Non-failing operations below */
2403 switch (s->type) {
2404 case SOURCE_TIME_REALTIME:
2405 case SOURCE_TIME_BOOTTIME:
2406 case SOURCE_TIME_MONOTONIC:
2407 case SOURCE_TIME_REALTIME_ALARM:
2408 case SOURCE_TIME_BOOTTIME_ALARM:
2409 event_source_time_prioq_reshuffle(s);
2410 break;
2411
2412 case SOURCE_EXIT:
2413 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2414 break;
2415
2416 default:
2417 break;
2418 }
2419
ddfde737
LP
2420 return 0;
2421}
2422
2423_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2424 int r;
9da4cb2b 2425
ddfde737
LP
2426 assert_return(s, -EINVAL);
2427 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2428 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2429
ddfde737
LP
2430 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2431 if (s->event->state == SD_EVENT_FINISHED)
2432 return m == SD_EVENT_OFF ? 0 : -ESTALE;
305f78bf 2433
ddfde737
LP
2434 if (s->enabled == m) /* No change? */
2435 return 0;
9d3e3aa5 2436
ddfde737
LP
2437 if (m == SD_EVENT_OFF)
2438 r = event_source_disable(s);
2439 else {
2440 if (s->enabled != SD_EVENT_OFF) {
2441 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2442 * event source is already enabled after all. */
2443 s->enabled = m;
2444 return 0;
fd38203a 2445 }
ddfde737
LP
2446
2447 r = event_source_enable(s, m);
fd38203a 2448 }
ddfde737
LP
2449 if (r < 0)
2450 return r;
fd38203a 2451
e1951c16 2452 event_source_pp_prioq_reshuffle(s);
fd38203a
LP
2453 return 0;
2454}
2455
f7262a9f 2456_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2457 assert_return(s, -EINVAL);
2458 assert_return(usec, -EINVAL);
6a0f1f6d 2459 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2460 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2461
2462 *usec = s->time.next;
2463 return 0;
2464}
2465
f7262a9f 2466_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2467 int r;
6a0f1f6d 2468
305f78bf 2469 assert_return(s, -EINVAL);
6a0f1f6d 2470 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2471 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2472 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2473
2a0dc6cd
LP
2474 r = source_set_pending(s, false);
2475 if (r < 0)
2476 return r;
2576a19e 2477
2a0dc6cd 2478 s->time.next = usec;
fd38203a 2479
e1951c16 2480 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2481 return 0;
2482}
2483
d6a83dc4
LP
2484_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2485 usec_t t;
2486 int r;
2487
2488 assert_return(s, -EINVAL);
2489 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2490
2491 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2492 if (r < 0)
2493 return r;
2494
2495 if (usec >= USEC_INFINITY - t)
2496 return -EOVERFLOW;
2497
2498 return sd_event_source_set_time(s, t + usec);
2499}
2500
f7262a9f 2501_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2502 assert_return(s, -EINVAL);
2503 assert_return(usec, -EINVAL);
6a0f1f6d 2504 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2505 assert_return(!event_pid_changed(s->event), -ECHILD);
2506
2507 *usec = s->time.accuracy;
2508 return 0;
2509}
2510
f7262a9f 2511_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2a0dc6cd 2512 int r;
6a0f1f6d 2513
305f78bf
LP
2514 assert_return(s, -EINVAL);
2515 assert_return(usec != (uint64_t) -1, -EINVAL);
6a0f1f6d 2516 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2517 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2518 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2519
2a0dc6cd
LP
2520 r = source_set_pending(s, false);
2521 if (r < 0)
2522 return r;
2523
eaa3cbef
LP
2524 if (usec == 0)
2525 usec = DEFAULT_ACCURACY_USEC;
2526
eaa3cbef
LP
2527 s->time.accuracy = usec;
2528
e1951c16 2529 event_source_time_prioq_reshuffle(s);
6a0f1f6d
LP
2530 return 0;
2531}
2532
2533_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2534 assert_return(s, -EINVAL);
2535 assert_return(clock, -EINVAL);
2536 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2537 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2538
6a0f1f6d 2539 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2540 return 0;
2541}
2542
f7262a9f 2543_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2544 assert_return(s, -EINVAL);
2545 assert_return(pid, -EINVAL);
2546 assert_return(s->type == SOURCE_CHILD, -EDOM);
2547 assert_return(!event_pid_changed(s->event), -ECHILD);
2548
2549 *pid = s->child.pid;
2550 return 0;
2551}
2552
f8f3f926
LP
2553_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2554 assert_return(s, -EINVAL);
2555 assert_return(s->type == SOURCE_CHILD, -EDOM);
2556 assert_return(!event_pid_changed(s->event), -ECHILD);
2557
2558 if (s->child.pidfd < 0)
2559 return -EOPNOTSUPP;
2560
2561 return s->child.pidfd;
2562}
2563
2564_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2565 assert_return(s, -EINVAL);
2566 assert_return(s->type == SOURCE_CHILD, -EDOM);
2567 assert_return(!event_pid_changed(s->event), -ECHILD);
2568 assert_return(SIGNAL_VALID(sig), -EINVAL);
2569
2570 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2571 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2572 * available. */
2573 if (s->child.exited)
2574 return -ESRCH;
2575
2576 if (s->child.pidfd >= 0) {
2577 siginfo_t copy;
2578
2579 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2580 * structure here */
2581 if (si)
2582 copy = *si;
2583
2584 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2585 /* Let's propagate the error only if the system call is not implemented or prohibited */
2586 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2587 return -errno;
2588 } else
2589 return 0;
2590 }
2591
2592 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2593 * this here. */
2594 if (flags != 0)
2595 return -EOPNOTSUPP;
2596
2597 if (si) {
2598 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2599 siginfo_t copy = *si;
2600
2601 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2602 return -errno;
2603 } else if (kill(s->child.pid, sig) < 0)
2604 return -errno;
2605
2606 return 0;
2607}
2608
2609_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2610 assert_return(s, -EINVAL);
2611 assert_return(s->type == SOURCE_CHILD, -EDOM);
2612
2613 if (s->child.pidfd < 0)
2614 return -EOPNOTSUPP;
2615
2616 return s->child.pidfd_owned;
2617}
2618
2619_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2620 assert_return(s, -EINVAL);
2621 assert_return(s->type == SOURCE_CHILD, -EDOM);
2622
2623 if (s->child.pidfd < 0)
2624 return -EOPNOTSUPP;
2625
2626 s->child.pidfd_owned = own;
2627 return 0;
2628}
2629
2630_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2631 assert_return(s, -EINVAL);
2632 assert_return(s->type == SOURCE_CHILD, -EDOM);
2633
2634 return s->child.process_owned;
2635}
2636
2637_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2638 assert_return(s, -EINVAL);
2639 assert_return(s->type == SOURCE_CHILD, -EDOM);
2640
2641 s->child.process_owned = own;
2642 return 0;
2643}
2644
97ef5391
LP
2645_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2646 assert_return(s, -EINVAL);
2647 assert_return(mask, -EINVAL);
2648 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2649 assert_return(!event_pid_changed(s->event), -ECHILD);
2650
2651 *mask = s->inotify.mask;
2652 return 0;
2653}
2654
718db961 2655_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2656 int r;
2657
da7e457c 2658 assert_return(s, -EINVAL);
6203e07a 2659 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2660 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2661 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2662
2663 if (s->prepare == callback)
2664 return 0;
2665
2666 if (callback && s->prepare) {
2667 s->prepare = callback;
2668 return 0;
2669 }
2670
2671 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2672 if (r < 0)
2673 return r;
2674
2675 s->prepare = callback;
2676
2677 if (callback) {
2678 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2679 if (r < 0)
2680 return r;
2681 } else
2682 prioq_remove(s->event->prepare, s, &s->prepare_index);
2683
2684 return 0;
2685}
2686
f7262a9f 2687_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2688 assert_return(s, NULL);
fd38203a
LP
2689
2690 return s->userdata;
2691}
2692
8f726607
LP
2693_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2694 void *ret;
2695
2696 assert_return(s, NULL);
2697
2698 ret = s->userdata;
2699 s->userdata = userdata;
2700
2701 return ret;
2702}
2703
c2ba3ad6
LP
2704static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2705 usec_t c;
2706 assert(e);
2707 assert(a <= b);
2708
2709 if (a <= 0)
2710 return 0;
393003e1
LP
2711 if (a >= USEC_INFINITY)
2712 return USEC_INFINITY;
c2ba3ad6
LP
2713
2714 if (b <= a + 1)
2715 return a;
2716
52444dc4
LP
2717 initialize_perturb(e);
2718
c2ba3ad6
LP
2719 /*
2720 Find a good time to wake up again between times a and b. We
2721 have two goals here:
2722
2723 a) We want to wake up as seldom as possible, hence prefer
2724 later times over earlier times.
2725
2726 b) But if we have to wake up, then let's make sure to
2727 dispatch as much as possible on the entire system.
2728
2729 We implement this by waking up everywhere at the same time
850516e0 2730 within any given minute if we can, synchronised via the
c2ba3ad6 2731 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
2732 then we try to find the same spot in every 10s, then 1s and
2733 then 250ms step. Otherwise, we pick the last possible time
2734 to wake up.
c2ba3ad6
LP
2735 */
2736
850516e0
LP
2737 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2738 if (c >= b) {
2739 if (_unlikely_(c < USEC_PER_MINUTE))
2740 return b;
2741
2742 c -= USEC_PER_MINUTE;
2743 }
2744
ba276c81
LP
2745 if (c >= a)
2746 return c;
2747
2748 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2749 if (c >= b) {
2750 if (_unlikely_(c < USEC_PER_SEC*10))
2751 return b;
2752
2753 c -= USEC_PER_SEC*10;
2754 }
2755
850516e0
LP
2756 if (c >= a)
2757 return c;
2758
2759 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
2760 if (c >= b) {
2761 if (_unlikely_(c < USEC_PER_SEC))
2762 return b;
2763
2764 c -= USEC_PER_SEC;
2765 }
2766
2767 if (c >= a)
2768 return c;
2769
2770 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2771 if (c >= b) {
2772 if (_unlikely_(c < USEC_PER_MSEC*250))
2773 return b;
2774
2775 c -= USEC_PER_MSEC*250;
2776 }
2777
2778 if (c >= a)
2779 return c;
2780
2781 return b;
2782}
2783
fd38203a
LP
2784static int event_arm_timer(
2785 sd_event *e,
6a0f1f6d 2786 struct clock_data *d) {
fd38203a
LP
2787
2788 struct itimerspec its = {};
c2ba3ad6
LP
2789 sd_event_source *a, *b;
2790 usec_t t;
fd38203a 2791
cde93897 2792 assert(e);
6a0f1f6d 2793 assert(d);
fd38203a 2794
d06441da 2795 if (!d->needs_rearm)
212bbb17
TG
2796 return 0;
2797 else
2798 d->needs_rearm = false;
2799
6a0f1f6d 2800 a = prioq_peek(d->earliest);
393003e1 2801 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
72aedc1e 2802
6a0f1f6d 2803 if (d->fd < 0)
c57b5ca3
LP
2804 return 0;
2805
3a43da28 2806 if (d->next == USEC_INFINITY)
72aedc1e
LP
2807 return 0;
2808
2809 /* disarm */
15c689d7
LP
2810 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
2811 return -errno;
72aedc1e 2812
3a43da28 2813 d->next = USEC_INFINITY;
fd38203a 2814 return 0;
72aedc1e 2815 }
fd38203a 2816
6a0f1f6d 2817 b = prioq_peek(d->latest);
baf76283 2818 assert_se(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 2819
1bce0ffa 2820 t = sleep_between(e, a->time.next, time_event_source_latest(b));
6a0f1f6d 2821 if (d->next == t)
fd38203a
LP
2822 return 0;
2823
6a0f1f6d 2824 assert_se(d->fd >= 0);
fd38203a 2825
c2ba3ad6 2826 if (t == 0) {
fd38203a
LP
2827 /* We don' want to disarm here, just mean some time looooong ago. */
2828 its.it_value.tv_sec = 0;
2829 its.it_value.tv_nsec = 1;
2830 } else
c2ba3ad6 2831 timespec_store(&its.it_value, t);
fd38203a 2832
15c689d7 2833 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897 2834 return -errno;
fd38203a 2835
6a0f1f6d 2836 d->next = t;
fd38203a
LP
2837 return 0;
2838}
2839
9a800b56 2840static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
2841 assert(e);
2842 assert(s);
2843 assert(s->type == SOURCE_IO);
2844
9a800b56
LP
2845 /* If the event source was already pending, we just OR in the
2846 * new revents, otherwise we reset the value. The ORing is
2847 * necessary to handle EPOLLONESHOT events properly where
2848 * readability might happen independently of writability, and
2849 * we need to keep track of both */
2850
2851 if (s->pending)
2852 s->io.revents |= revents;
2853 else
2854 s->io.revents = revents;
fd38203a 2855
fd38203a
LP
2856 return source_set_pending(s, true);
2857}
2858
72aedc1e 2859static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
2860 uint64_t x;
2861 ssize_t ss;
2862
2863 assert(e);
da7e457c 2864 assert(fd >= 0);
72aedc1e 2865
305f78bf 2866 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
2867
2868 ss = read(fd, &x, sizeof(x));
2869 if (ss < 0) {
945c2931 2870 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
2871 return 0;
2872
2873 return -errno;
2874 }
2875
8d35dae7 2876 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
2877 return -EIO;
2878
cde93897 2879 if (next)
3a43da28 2880 *next = USEC_INFINITY;
72aedc1e 2881
fd38203a
LP
2882 return 0;
2883}
2884
305f78bf
LP
2885static int process_timer(
2886 sd_event *e,
2887 usec_t n,
6a0f1f6d 2888 struct clock_data *d) {
305f78bf 2889
fd38203a
LP
2890 sd_event_source *s;
2891 int r;
2892
2893 assert(e);
6a0f1f6d 2894 assert(d);
fd38203a
LP
2895
2896 for (;;) {
6a0f1f6d 2897 s = prioq_peek(d->earliest);
fd38203a
LP
2898 if (!s ||
2899 s->time.next > n ||
baf76283 2900 s->enabled == SD_EVENT_OFF ||
fd38203a
LP
2901 s->pending)
2902 break;
2903
2904 r = source_set_pending(s, true);
2905 if (r < 0)
2906 return r;
2907
e1951c16 2908 event_source_time_prioq_reshuffle(s);
fd38203a
LP
2909 }
2910
2911 return 0;
2912}
2913
2914static int process_child(sd_event *e) {
2915 sd_event_source *s;
fd38203a
LP
2916 int r;
2917
2918 assert(e);
2919
c2ba3ad6
LP
2920 e->need_process_child = false;
2921
fd38203a
LP
2922 /*
2923 So, this is ugly. We iteratively invoke waitid() with P_PID
2924 + WNOHANG for each PID we wait for, instead of using
2925 P_ALL. This is because we only want to get child
2926 information of very specific child processes, and not all
2927 of them. We might not have processed the SIGCHLD even of a
2928 previous invocation and we don't want to maintain a
2929 unbounded *per-child* event queue, hence we really don't
2930 want anything flushed out of the kernel's queue that we
2931 don't care about. Since this is O(n) this means that if you
2932 have a lot of processes you probably want to handle SIGCHLD
2933 yourself.
08cd1552
LP
2934
2935 We do not reap the children here (by using WNOWAIT), this
2936 is only done after the event source is dispatched so that
2937 the callback still sees the process as a zombie.
fd38203a
LP
2938 */
2939
90e74a66 2940 HASHMAP_FOREACH(s, e->child_sources) {
fd38203a
LP
2941 assert(s->type == SOURCE_CHILD);
2942
2943 if (s->pending)
2944 continue;
2945
baf76283 2946 if (s->enabled == SD_EVENT_OFF)
fd38203a
LP
2947 continue;
2948
f8f3f926
LP
2949 if (s->child.exited)
2950 continue;
2951
2952 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2953 continue;
2954
fd38203a 2955 zero(s->child.siginfo);
15c689d7
LP
2956 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
2957 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
fd38203a
LP
2958 return -errno;
2959
2960 if (s->child.siginfo.si_pid != 0) {
945c2931 2961 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 2962
f8f3f926
LP
2963 if (zombie)
2964 s->child.exited = true;
2965
08cd1552
LP
2966 if (!zombie && (s->child.options & WEXITED)) {
2967 /* If the child isn't dead then let's
2968 * immediately remove the state change
2969 * from the queue, since there's no
2970 * benefit in leaving it queued */
2971
2972 assert(s->child.options & (WSTOPPED|WCONTINUED));
a5d27871 2973 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
08cd1552
LP
2974 }
2975
fd38203a
LP
2976 r = source_set_pending(s, true);
2977 if (r < 0)
2978 return r;
2979 }
2980 }
2981
fd38203a
LP
2982 return 0;
2983}
2984
f8f3f926
LP
2985static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2986 assert(e);
2987 assert(s);
2988 assert(s->type == SOURCE_CHILD);
2989
2990 if (s->pending)
2991 return 0;
2992
2993 if (s->enabled == SD_EVENT_OFF)
2994 return 0;
2995
2996 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2997 return 0;
2998
2999 zero(s->child.siginfo);
3000 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3001 return -errno;
3002
3003 if (s->child.siginfo.si_pid == 0)
3004 return 0;
3005
3006 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
3007 s->child.exited = true;
3008
3009 return source_set_pending(s, true);
3010}
3011
9da4cb2b 3012static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
fd38203a 3013 bool read_one = false;
fd38203a
LP
3014 int r;
3015
da7e457c 3016 assert(e);
97ef5391 3017 assert(d);
305f78bf 3018 assert_return(events == EPOLLIN, -EIO);
fd38203a 3019
9da4cb2b
LP
3020 /* If there's a signal queued on this priority and SIGCHLD is
3021 on this priority too, then make sure to recheck the
3022 children we watch. This is because we only ever dequeue
3023 the first signal per priority, and if we dequeue one, and
3024 SIGCHLD might be enqueued later we wouldn't know, but we
3025 might have higher priority children we care about hence we
3026 need to check that explicitly. */
3027
3028 if (sigismember(&d->sigset, SIGCHLD))
3029 e->need_process_child = true;
3030
3031 /* If there's already an event source pending for this
3032 * priority we don't read another */
3033 if (d->current)
3034 return 0;
3035
fd38203a 3036 for (;;) {
0eb2e0e3 3037 struct signalfd_siginfo si;
7057bd99 3038 ssize_t n;
92daebc0 3039 sd_event_source *s = NULL;
fd38203a 3040
9da4cb2b 3041 n = read(d->fd, &si, sizeof(si));
7057bd99 3042 if (n < 0) {
945c2931 3043 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
3044 return read_one;
3045
3046 return -errno;
3047 }
3048
7057bd99 3049 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
3050 return -EIO;
3051
6eb7c172 3052 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 3053
fd38203a
LP
3054 read_one = true;
3055
92daebc0
LP
3056 if (e->signal_sources)
3057 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
3058 if (!s)
3059 continue;
9da4cb2b
LP
3060 if (s->pending)
3061 continue;
fd38203a
LP
3062
3063 s->signal.siginfo = si;
9da4cb2b
LP
3064 d->current = s;
3065
fd38203a
LP
3066 r = source_set_pending(s, true);
3067 if (r < 0)
3068 return r;
9da4cb2b
LP
3069
3070 return 1;
fd38203a 3071 }
fd38203a
LP
3072}
3073
97ef5391
LP
3074static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3075 ssize_t n;
3076
3077 assert(e);
3078 assert(d);
3079
3080 assert_return(revents == EPOLLIN, -EIO);
3081
3082 /* If there's already an event source pending for this priority, don't read another */
3083 if (d->n_pending > 0)
3084 return 0;
3085
3086 /* Is the read buffer non-empty? If so, let's not read more */
3087 if (d->buffer_filled > 0)
3088 return 0;
3089
3090 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3091 if (n < 0) {
3092 if (IN_SET(errno, EAGAIN, EINTR))
3093 return 0;
3094
3095 return -errno;
3096 }
3097
3098 assert(n > 0);
3099 d->buffer_filled = (size_t) n;
3100 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3101
3102 return 1;
3103}
3104
3105static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3106 assert(e);
3107 assert(d);
3108 assert(sz <= d->buffer_filled);
3109
3110 if (sz == 0)
3111 return;
3112
3113 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3114 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3115 d->buffer_filled -= sz;
3116
3117 if (d->buffer_filled == 0)
3118 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3119}
3120
3121static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3122 int r;
3123
3124 assert(e);
3125 assert(d);
3126
3127 /* If there's already an event source pending for this priority, don't read another */
3128 if (d->n_pending > 0)
3129 return 0;
3130
3131 while (d->buffer_filled > 0) {
3132 size_t sz;
3133
3134 /* Let's validate that the event structures are complete */
3135 if (d->buffer_filled < offsetof(struct inotify_event, name))
3136 return -EIO;
3137
3138 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3139 if (d->buffer_filled < sz)
3140 return -EIO;
3141
3142 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3143 struct inode_data *inode_data;
97ef5391
LP
3144
3145 /* The queue overran, let's pass this event to all event sources connected to this inotify
3146 * object */
3147
90e74a66 3148 HASHMAP_FOREACH(inode_data, d->inodes) {
97ef5391
LP
3149 sd_event_source *s;
3150
3151 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3152
3153 if (s->enabled == SD_EVENT_OFF)
3154 continue;
3155
3156 r = source_set_pending(s, true);
3157 if (r < 0)
3158 return r;
3159 }
3160 }
3161 } else {
3162 struct inode_data *inode_data;
3163 sd_event_source *s;
3164
3165 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3166 * our watch descriptor table. */
3167 if (d->buffer.ev.mask & IN_IGNORED) {
3168
3169 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3170 if (!inode_data) {
3171 event_inotify_data_drop(e, d, sz);
3172 continue;
3173 }
3174
3175 /* The watch descriptor was removed by the kernel, let's drop it here too */
3176 inode_data->wd = -1;
3177 } else {
3178 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3179 if (!inode_data) {
3180 event_inotify_data_drop(e, d, sz);
3181 continue;
3182 }
3183 }
3184
3185 /* Trigger all event sources that are interested in these events. Also trigger all event
3186 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3187 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3188
3189 if (s->enabled == SD_EVENT_OFF)
3190 continue;
3191
3192 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3193 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3194 continue;
3195
3196 r = source_set_pending(s, true);
3197 if (r < 0)
3198 return r;
3199 }
3200 }
3201
3202 /* Something pending now? If so, let's finish, otherwise let's read more. */
3203 if (d->n_pending > 0)
3204 return 1;
3205 }
3206
3207 return 0;
3208}
3209
3210static int process_inotify(sd_event *e) {
3211 struct inotify_data *d;
3212 int r, done = 0;
3213
3214 assert(e);
3215
3216 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3217 r = event_inotify_data_process(e, d);
3218 if (r < 0)
3219 return r;
3220 if (r > 0)
3221 done ++;
3222 }
3223
3224 return done;
3225}
3226
fd38203a 3227static int source_dispatch(sd_event_source *s) {
b778cba4 3228 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
8f5c235d 3229 EventSourceType saved_type;
fe8245eb 3230 int r = 0;
fd38203a
LP
3231
3232 assert(s);
6203e07a 3233 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 3234
b778cba4
LP
3235 /* Save the event source type, here, so that we still know it after the event callback which might
3236 * invalidate the event. */
8f5c235d
LP
3237 saved_type = s->type;
3238
b778cba4
LP
3239 /* Similar, store a reference to the event loop object, so that we can still access it after the
3240 * callback might have invalidated/disconnected the event source. */
3241 saved_event = sd_event_ref(s->event);
3242
945c2931 3243 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
3244 r = source_set_pending(s, false);
3245 if (r < 0)
3246 return r;
3247 }
fd38203a 3248
6e9feda3
LP
3249 if (s->type != SOURCE_POST) {
3250 sd_event_source *z;
6e9feda3
LP
3251
3252 /* If we execute a non-post source, let's mark all
3253 * post sources as pending */
3254
90e74a66 3255 SET_FOREACH(z, s->event->post_sources) {
6e9feda3
LP
3256 if (z->enabled == SD_EVENT_OFF)
3257 continue;
3258
3259 r = source_set_pending(z, true);
3260 if (r < 0)
3261 return r;
3262 }
3263 }
3264
baf76283
LP
3265 if (s->enabled == SD_EVENT_ONESHOT) {
3266 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3267 if (r < 0)
3268 return r;
3269 }
3270
12179984 3271 s->dispatching = true;
b7484e2a 3272
fd38203a
LP
3273 switch (s->type) {
3274
3275 case SOURCE_IO:
3276 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3277 break;
3278
6a0f1f6d 3279 case SOURCE_TIME_REALTIME:
a8548816 3280 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
3281 case SOURCE_TIME_MONOTONIC:
3282 case SOURCE_TIME_REALTIME_ALARM:
3283 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
3284 r = s->time.callback(s, s->time.next, s->userdata);
3285 break;
3286
3287 case SOURCE_SIGNAL:
3288 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3289 break;
3290
08cd1552
LP
3291 case SOURCE_CHILD: {
3292 bool zombie;
3293
945c2931 3294 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 3295
fd38203a 3296 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
3297
3298 /* Now, reap the PID for good. */
f8f3f926 3299 if (zombie) {
cc59d290 3300 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
f8f3f926
LP
3301 s->child.waited = true;
3302 }
08cd1552 3303
fd38203a 3304 break;
08cd1552 3305 }
fd38203a
LP
3306
3307 case SOURCE_DEFER:
3308 r = s->defer.callback(s, s->userdata);
3309 break;
da7e457c 3310
6e9feda3
LP
3311 case SOURCE_POST:
3312 r = s->post.callback(s, s->userdata);
3313 break;
3314
6203e07a
LP
3315 case SOURCE_EXIT:
3316 r = s->exit.callback(s, s->userdata);
da7e457c 3317 break;
9d3e3aa5 3318
97ef5391
LP
3319 case SOURCE_INOTIFY: {
3320 struct sd_event *e = s->event;
3321 struct inotify_data *d;
3322 size_t sz;
3323
3324 assert(s->inotify.inode_data);
3325 assert_se(d = s->inotify.inode_data->inotify_data);
3326
3327 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3328 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3329 assert(d->buffer_filled >= sz);
3330
3331 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3332
3333 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3334 * buffer. */
3335 if (d->n_pending == 0)
3336 event_inotify_data_drop(e, d, sz);
3337
3338 break;
3339 }
3340
9d3e3aa5 3341 case SOURCE_WATCHDOG:
a71fe8b8 3342 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 3343 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
9d3e3aa5 3344 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
3345 }
3346
12179984
LP
3347 s->dispatching = false;
3348
b778cba4
LP
3349 if (r < 0) {
3350 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3351 strna(s->description),
3352 event_source_type_to_string(saved_type),
3353 s->exit_on_failure ? "exiting" : "disabling");
3354
3355 if (s->exit_on_failure)
3356 (void) sd_event_exit(saved_event, r);
3357 }
12179984
LP
3358
3359 if (s->n_ref == 0)
3360 source_free(s);
3361 else if (r < 0)
6203e07a 3362 sd_event_source_set_enabled(s, SD_EVENT_OFF);
b7484e2a 3363
6203e07a 3364 return 1;
fd38203a
LP
3365}
3366
3367static int event_prepare(sd_event *e) {
3368 int r;
3369
3370 assert(e);
3371
3372 for (;;) {
3373 sd_event_source *s;
3374
3375 s = prioq_peek(e->prepare);
baf76283 3376 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
fd38203a
LP
3377 break;
3378
3379 s->prepare_iteration = e->iteration;
3380 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3381 if (r < 0)
3382 return r;
3383
3384 assert(s->prepare);
12179984
LP
3385
3386 s->dispatching = true;
fd38203a 3387 r = s->prepare(s, s->userdata);
12179984
LP
3388 s->dispatching = false;
3389
b778cba4
LP
3390 if (r < 0) {
3391 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3392 strna(s->description),
3393 event_source_type_to_string(s->type),
3394 s->exit_on_failure ? "exiting" : "disabling");
3395
3396 if (s->exit_on_failure)
3397 (void) sd_event_exit(e, r);
3398 }
fd38203a 3399
12179984
LP
3400 if (s->n_ref == 0)
3401 source_free(s);
3402 else if (r < 0)
3403 sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
3404 }
3405
3406 return 0;
3407}
3408
6203e07a 3409static int dispatch_exit(sd_event *e) {
da7e457c
LP
3410 sd_event_source *p;
3411 int r;
3412
3413 assert(e);
3414
6203e07a 3415 p = prioq_peek(e->exit);
baf76283 3416 if (!p || p->enabled == SD_EVENT_OFF) {
da7e457c
LP
3417 e->state = SD_EVENT_FINISHED;
3418 return 0;
3419 }
3420
f814c871 3421 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
da7e457c 3422 e->iteration++;
6203e07a 3423 e->state = SD_EVENT_EXITING;
da7e457c 3424 r = source_dispatch(p);
2b0c9ef7 3425 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3426 return r;
3427}
3428
c2ba3ad6
LP
3429static sd_event_source* event_next_pending(sd_event *e) {
3430 sd_event_source *p;
3431
da7e457c
LP
3432 assert(e);
3433
c2ba3ad6
LP
3434 p = prioq_peek(e->pending);
3435 if (!p)
3436 return NULL;
3437
baf76283 3438 if (p->enabled == SD_EVENT_OFF)
c2ba3ad6
LP
3439 return NULL;
3440
3441 return p;
3442}
3443
cde93897
LP
3444static int arm_watchdog(sd_event *e) {
3445 struct itimerspec its = {};
3446 usec_t t;
cde93897
LP
3447
3448 assert(e);
3449 assert(e->watchdog_fd >= 0);
3450
3451 t = sleep_between(e,
3452 e->watchdog_last + (e->watchdog_period / 2),
3453 e->watchdog_last + (e->watchdog_period * 3 / 4));
3454
3455 timespec_store(&its.it_value, t);
3456
75145780
LP
3457 /* Make sure we never set the watchdog to 0, which tells the
3458 * kernel to disable it. */
3459 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3460 its.it_value.tv_nsec = 1;
3461
15c689d7 3462 if (timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
cde93897
LP
3463 return -errno;
3464
3465 return 0;
3466}
3467
3468static int process_watchdog(sd_event *e) {
3469 assert(e);
3470
3471 if (!e->watchdog)
3472 return 0;
3473
3474 /* Don't notify watchdog too often */
3475 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3476 return 0;
3477
3478 sd_notify(false, "WATCHDOG=1");
3479 e->watchdog_last = e->timestamp.monotonic;
3480
3481 return arm_watchdog(e);
3482}
3483
97ef5391
LP
3484static void event_close_inode_data_fds(sd_event *e) {
3485 struct inode_data *d;
3486
3487 assert(e);
3488
3489 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3490 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
5238e957 3491 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
97ef5391
LP
3492 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3493 * compromise. */
3494
3495 while ((d = e->inode_data_to_close)) {
3496 assert(d->fd >= 0);
3497 d->fd = safe_close(d->fd);
3498
3499 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3500 }
3501}
3502
c45a5a74
TG
3503_public_ int sd_event_prepare(sd_event *e) {
3504 int r;
fd38203a 3505
da7e457c 3506 assert_return(e, -EINVAL);
b937d761 3507 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3508 assert_return(!event_pid_changed(e), -ECHILD);
3509 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3510 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3511
e5446015
LP
3512 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3513 * this check here once, since gettid() is typically not cached, and thus want to minimize
3514 * syscalls */
3515 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3516
f814c871
LP
3517 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
3518 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3519
6203e07a 3520 if (e->exit_requested)
c45a5a74 3521 goto pending;
fd38203a
LP
3522
3523 e->iteration++;
3524
0be6c2f6 3525 e->state = SD_EVENT_PREPARING;
fd38203a 3526 r = event_prepare(e);
0be6c2f6 3527 e->state = SD_EVENT_INITIAL;
fd38203a 3528 if (r < 0)
c45a5a74 3529 return r;
fd38203a 3530
6a0f1f6d
LP
3531 r = event_arm_timer(e, &e->realtime);
3532 if (r < 0)
c45a5a74 3533 return r;
6a0f1f6d 3534
a8548816
TG
3535 r = event_arm_timer(e, &e->boottime);
3536 if (r < 0)
c45a5a74 3537 return r;
a8548816 3538
6a0f1f6d
LP
3539 r = event_arm_timer(e, &e->monotonic);
3540 if (r < 0)
c45a5a74 3541 return r;
6a0f1f6d
LP
3542
3543 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3544 if (r < 0)
c45a5a74 3545 return r;
fd38203a 3546
6a0f1f6d 3547 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3548 if (r < 0)
c45a5a74 3549 return r;
fd38203a 3550
97ef5391
LP
3551 event_close_inode_data_fds(e);
3552
1b5995b0 3553 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3554 goto pending;
3555
2b0c9ef7 3556 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3557
3558 return 0;
3559
3560pending:
2b0c9ef7 3561 e->state = SD_EVENT_ARMED;
6d148a84
TG
3562 r = sd_event_wait(e, 0);
3563 if (r == 0)
2b0c9ef7 3564 e->state = SD_EVENT_ARMED;
6d148a84
TG
3565
3566 return r;
c45a5a74
TG
3567}
3568
3569_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
5cddd924 3570 size_t event_queue_max;
c45a5a74
TG
3571 int r, m, i;
3572
3573 assert_return(e, -EINVAL);
b937d761 3574 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3575 assert_return(!event_pid_changed(e), -ECHILD);
3576 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3577 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
c45a5a74
TG
3578
3579 if (e->exit_requested) {
3580 e->state = SD_EVENT_PENDING;
3581 return 1;
3582 }
6a0f1f6d 3583
5cddd924
LP
3584 event_queue_max = MAX(e->n_sources, 1u);
3585 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3586 return -ENOMEM;
fd38203a 3587
97ef5391
LP
3588 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3589 if (e->inotify_data_buffered)
3590 timeout = 0;
3591
5cddd924 3592 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
bab4820e 3593 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
da7e457c 3594 if (m < 0) {
c45a5a74
TG
3595 if (errno == EINTR) {
3596 e->state = SD_EVENT_PENDING;
3597 return 1;
3598 }
3599
3600 r = -errno;
da7e457c
LP
3601 goto finish;
3602 }
fd38203a 3603
e475d10c 3604 triple_timestamp_get(&e->timestamp);
fd38203a
LP
3605
3606 for (i = 0; i < m; i++) {
3607
5cddd924
LP
3608 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3609 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
9da4cb2b 3610 else {
5cddd924 3611 WakeupType *t = e->event_queue[i].data.ptr;
9da4cb2b
LP
3612
3613 switch (*t) {
3614
f8f3f926 3615 case WAKEUP_EVENT_SOURCE: {
5cddd924 3616 sd_event_source *s = e->event_queue[i].data.ptr;
f8f3f926
LP
3617
3618 assert(s);
3619
3620 switch (s->type) {
3621
3622 case SOURCE_IO:
5cddd924 3623 r = process_io(e, s, e->event_queue[i].events);
f8f3f926
LP
3624 break;
3625
3626 case SOURCE_CHILD:
5cddd924 3627 r = process_pidfd(e, s, e->event_queue[i].events);
f8f3f926
LP
3628 break;
3629
3630 default:
3631 assert_not_reached("Unexpected event source type");
3632 }
3633
9da4cb2b 3634 break;
f8f3f926 3635 }
fd38203a 3636
9da4cb2b 3637 case WAKEUP_CLOCK_DATA: {
5cddd924 3638 struct clock_data *d = e->event_queue[i].data.ptr;
f8f3f926
LP
3639
3640 assert(d);
3641
5cddd924 3642 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
9da4cb2b
LP
3643 break;
3644 }
3645
3646 case WAKEUP_SIGNAL_DATA:
5cddd924 3647 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
9da4cb2b
LP
3648 break;
3649
97ef5391 3650 case WAKEUP_INOTIFY_DATA:
5cddd924 3651 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
97ef5391
LP
3652 break;
3653
9da4cb2b
LP
3654 default:
3655 assert_not_reached("Invalid wake-up pointer");
3656 }
3657 }
fd38203a 3658 if (r < 0)
da7e457c 3659 goto finish;
fd38203a
LP
3660 }
3661
cde93897
LP
3662 r = process_watchdog(e);
3663 if (r < 0)
3664 goto finish;
3665
6a0f1f6d
LP
3666 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3667 if (r < 0)
3668 goto finish;
3669
e475d10c 3670 r = process_timer(e, e->timestamp.boottime, &e->boottime);
a8548816
TG
3671 if (r < 0)
3672 goto finish;
3673
6a0f1f6d
LP
3674 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3675 if (r < 0)
3676 goto finish;
3677
3678 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 3679 if (r < 0)
da7e457c 3680 goto finish;
fd38203a 3681
e475d10c 3682 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 3683 if (r < 0)
da7e457c 3684 goto finish;
fd38203a 3685
c2ba3ad6 3686 if (e->need_process_child) {
fd38203a
LP
3687 r = process_child(e);
3688 if (r < 0)
da7e457c 3689 goto finish;
fd38203a
LP
3690 }
3691
97ef5391
LP
3692 r = process_inotify(e);
3693 if (r < 0)
3694 goto finish;
3695
c45a5a74
TG
3696 if (event_next_pending(e)) {
3697 e->state = SD_EVENT_PENDING;
3698
3699 return 1;
da7e457c
LP
3700 }
3701
c45a5a74 3702 r = 0;
fd38203a 3703
da7e457c 3704finish:
2b0c9ef7 3705 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3706
3707 return r;
fd38203a
LP
3708}
3709
c45a5a74
TG
3710_public_ int sd_event_dispatch(sd_event *e) {
3711 sd_event_source *p;
3712 int r;
3713
3714 assert_return(e, -EINVAL);
b937d761 3715 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3716 assert_return(!event_pid_changed(e), -ECHILD);
3717 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3718 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3719
3720 if (e->exit_requested)
3721 return dispatch_exit(e);
3722
3723 p = event_next_pending(e);
3724 if (p) {
f814c871 3725 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
c45a5a74
TG
3726
3727 e->state = SD_EVENT_RUNNING;
3728 r = source_dispatch(p);
2b0c9ef7 3729 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3730 return r;
3731 }
3732
2b0c9ef7 3733 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3734
3735 return 1;
3736}
3737
34b87517 3738static void event_log_delays(sd_event *e) {
442ac269
YW
3739 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3740 size_t l, i;
34b87517 3741
442ac269
YW
3742 p = b;
3743 l = sizeof(b);
3744 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3745 l = strpcpyf(&p, l, "%u ", e->delays[i]);
34b87517
VC
3746 e->delays[i] = 0;
3747 }
442ac269 3748 log_debug("Event loop iterations: %s", b);
34b87517
VC
3749}
3750
c45a5a74
TG
3751_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3752 int r;
3753
3754 assert_return(e, -EINVAL);
b937d761 3755 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3756 assert_return(!event_pid_changed(e), -ECHILD);
3757 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3758 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 3759
e6a7bee5 3760 if (e->profile_delays && e->last_run_usec != 0) {
34b87517
VC
3761 usec_t this_run;
3762 unsigned l;
3763
3764 this_run = now(CLOCK_MONOTONIC);
3765
e6a7bee5 3766 l = u64log2(this_run - e->last_run_usec);
cb9d621e 3767 assert(l < ELEMENTSOF(e->delays));
34b87517
VC
3768 e->delays[l]++;
3769
e6a7bee5 3770 if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
34b87517 3771 event_log_delays(e);
e6a7bee5 3772 e->last_log_usec = this_run;
34b87517
VC
3773 }
3774 }
3775
f814c871
LP
3776 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
3777 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3778
c45a5a74 3779 r = sd_event_prepare(e);
53bac4e0
LP
3780 if (r == 0)
3781 /* There was nothing? Then wait... */
3782 r = sd_event_wait(e, timeout);
c45a5a74 3783
34b87517 3784 if (e->profile_delays)
e6a7bee5 3785 e->last_run_usec = now(CLOCK_MONOTONIC);
34b87517 3786
02d30981 3787 if (r > 0) {
53bac4e0 3788 /* There's something now, then let's dispatch it */
02d30981
TG
3789 r = sd_event_dispatch(e);
3790 if (r < 0)
3791 return r;
53bac4e0
LP
3792
3793 return 1;
3794 }
3795
3796 return r;
c45a5a74
TG
3797}
3798
f7262a9f 3799_public_ int sd_event_loop(sd_event *e) {
fd38203a
LP
3800 int r;
3801
da7e457c 3802 assert_return(e, -EINVAL);
b937d761 3803 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 3804 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 3805 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3806
f814c871 3807 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
fd38203a 3808
da7e457c 3809 while (e->state != SD_EVENT_FINISHED) {
fd38203a
LP
3810 r = sd_event_run(e, (uint64_t) -1);
3811 if (r < 0)
30dd293c 3812 return r;
fd38203a
LP
3813 }
3814
30dd293c 3815 return e->exit_code;
fd38203a
LP
3816}
3817
9b364545 3818_public_ int sd_event_get_fd(sd_event *e) {
9b364545 3819 assert_return(e, -EINVAL);
b937d761 3820 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
3821 assert_return(!event_pid_changed(e), -ECHILD);
3822
3823 return e->epoll_fd;
3824}
3825
f7262a9f 3826_public_ int sd_event_get_state(sd_event *e) {
da7e457c 3827 assert_return(e, -EINVAL);
b937d761 3828 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3829 assert_return(!event_pid_changed(e), -ECHILD);
3830
3831 return e->state;
3832}
3833
6203e07a 3834_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 3835 assert_return(e, -EINVAL);
b937d761 3836 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 3837 assert_return(code, -EINVAL);
da7e457c 3838 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3839
6203e07a
LP
3840 if (!e->exit_requested)
3841 return -ENODATA;
3842
3843 *code = e->exit_code;
3844 return 0;
fd38203a
LP
3845}
3846
6203e07a 3847_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 3848 assert_return(e, -EINVAL);
b937d761 3849 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3850 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3851 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3852
6203e07a
LP
3853 e->exit_requested = true;
3854 e->exit_code = code;
3855
fd38203a
LP
3856 return 0;
3857}
46e8c825 3858
6a0f1f6d 3859_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 3860 assert_return(e, -EINVAL);
b937d761 3861 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 3862 assert_return(usec, -EINVAL);
46e8c825
LP
3863 assert_return(!event_pid_changed(e), -ECHILD);
3864
e475d10c
LP
3865 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3866 return -EOPNOTSUPP;
3867
3868 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3869 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3870 * the purpose of getting the time this doesn't matter. */
3411372e
LP
3871 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3872 return -EOPNOTSUPP;
3873
e475d10c 3874 if (!triple_timestamp_is_set(&e->timestamp)) {
15c689d7 3875 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
38a03f06
LP
3876 *usec = now(clock);
3877 return 1;
3878 }
46e8c825 3879
e475d10c 3880 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
3881 return 0;
3882}
afc6adb5
LP
3883
3884_public_ int sd_event_default(sd_event **ret) {
39883f62 3885 sd_event *e = NULL;
afc6adb5
LP
3886 int r;
3887
3888 if (!ret)
3889 return !!default_event;
3890
3891 if (default_event) {
3892 *ret = sd_event_ref(default_event);
3893 return 0;
3894 }
3895
3896 r = sd_event_new(&e);
3897 if (r < 0)
3898 return r;
3899
3900 e->default_event_ptr = &default_event;
3901 e->tid = gettid();
3902 default_event = e;
3903
3904 *ret = e;
3905 return 1;
3906}
3907
3908_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3909 assert_return(e, -EINVAL);
b937d761 3910 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 3911 assert_return(tid, -EINVAL);
76b54375 3912 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 3913
76b54375
LP
3914 if (e->tid != 0) {
3915 *tid = e->tid;
3916 return 0;
3917 }
3918
3919 return -ENXIO;
afc6adb5 3920}
cde93897
LP
3921
3922_public_ int sd_event_set_watchdog(sd_event *e, int b) {
3923 int r;
3924
3925 assert_return(e, -EINVAL);
b937d761 3926 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 3927 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
3928
3929 if (e->watchdog == !!b)
3930 return e->watchdog;
3931
3932 if (b) {
09812eb7
LP
3933 r = sd_watchdog_enabled(false, &e->watchdog_period);
3934 if (r <= 0)
cde93897 3935 return r;
cde93897
LP
3936
3937 /* Issue first ping immediately */
3938 sd_notify(false, "WATCHDOG=1");
3939 e->watchdog_last = now(CLOCK_MONOTONIC);
3940
3941 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3942 if (e->watchdog_fd < 0)
3943 return -errno;
3944
3945 r = arm_watchdog(e);
3946 if (r < 0)
3947 goto fail;
3948
1eac7948 3949 struct epoll_event ev = {
a82f89aa
LP
3950 .events = EPOLLIN,
3951 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3952 };
cde93897 3953
15c689d7 3954 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
cde93897
LP
3955 r = -errno;
3956 goto fail;
3957 }
3958
3959 } else {
3960 if (e->watchdog_fd >= 0) {
5a795bff 3961 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 3962 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3963 }
3964 }
3965
3966 e->watchdog = !!b;
3967 return e->watchdog;
3968
3969fail:
03e334a1 3970 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3971 return r;
3972}
8f726607
LP
3973
3974_public_ int sd_event_get_watchdog(sd_event *e) {
3975 assert_return(e, -EINVAL);
b937d761 3976 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
3977 assert_return(!event_pid_changed(e), -ECHILD);
3978
3979 return e->watchdog;
3980}
60a3b1e1
LP
3981
3982_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3983 assert_return(e, -EINVAL);
b937d761 3984 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
3985 assert_return(!event_pid_changed(e), -ECHILD);
3986
3987 *ret = e->iteration;
3988 return 0;
3989}
15723a1d
LP
3990
3991_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3992 assert_return(s, -EINVAL);
3993
3994 s->destroy_callback = callback;
3995 return 0;
3996}
3997
3998_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3999 assert_return(s, -EINVAL);
4000
4001 if (ret)
4002 *ret = s->destroy_callback;
4003
4004 return !!s->destroy_callback;
4005}
2382c936
YW
4006
4007_public_ int sd_event_source_get_floating(sd_event_source *s) {
4008 assert_return(s, -EINVAL);
4009
4010 return s->floating;
4011}
4012
4013_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
4014 assert_return(s, -EINVAL);
4015
4016 if (s->floating == !!b)
4017 return 0;
4018
4019 if (!s->event) /* Already disconnected */
4020 return -ESTALE;
4021
4022 s->floating = b;
4023
4024 if (b) {
4025 sd_event_source_ref(s);
4026 sd_event_unref(s->event);
4027 } else {
4028 sd_event_ref(s->event);
4029 sd_event_source_unref(s);
4030 }
4031
4032 return 1;
4033}
b778cba4
LP
4034
4035_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4036 assert_return(s, -EINVAL);
4037 assert_return(s->type != SOURCE_EXIT, -EDOM);
4038
4039 return s->exit_on_failure;
4040}
4041
4042_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4043 assert_return(s, -EINVAL);
4044 assert_return(s->type != SOURCE_EXIT, -EDOM);
4045
4046 if (s->exit_on_failure == !!b)
4047 return 0;
4048
4049 s->exit_on_failure = b;
4050 return 1;
4051}