]> git.ipfire.org Git - thirdparty/systemd.git/blame_incremental - src/libsystemd/sd-event/sd-event.c
Merge pull request #22791 from keszybz/bootctl-invert-order
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
... / ...
CommitLineData
1/* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
7#include "sd-daemon.h"
8#include "sd-event.h"
9#include "sd-id128.h"
10
11#include "alloc-util.h"
12#include "env-util.h"
13#include "event-source.h"
14#include "fd-util.h"
15#include "fs-util.h"
16#include "hashmap.h"
17#include "list.h"
18#include "macro.h"
19#include "memory-util.h"
20#include "missing_syscall.h"
21#include "prioq.h"
22#include "process-util.h"
23#include "set.h"
24#include "signal-util.h"
25#include "string-table.h"
26#include "string-util.h"
27#include "strxcpyx.h"
28#include "time-util.h"
29
30#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38}
39
40static bool event_source_is_online(sd_event_source *s) {
41 assert(s);
42 return s->enabled != SD_EVENT_OFF && !s->ratelimited;
43}
44
45static bool event_source_is_offline(sd_event_source *s) {
46 assert(s);
47 return s->enabled == SD_EVENT_OFF || s->ratelimited;
48}
49
50static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
51 [SOURCE_IO] = "io",
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
63 [SOURCE_INOTIFY] = "inotify",
64};
65
66DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67
68#define EVENT_SOURCE_IS_TIME(t) \
69 IN_SET((t), \
70 SOURCE_TIME_REALTIME, \
71 SOURCE_TIME_BOOTTIME, \
72 SOURCE_TIME_MONOTONIC, \
73 SOURCE_TIME_REALTIME_ALARM, \
74 SOURCE_TIME_BOOTTIME_ALARM)
75
76#define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
77 IN_SET((t), \
78 SOURCE_IO, \
79 SOURCE_TIME_REALTIME, \
80 SOURCE_TIME_BOOTTIME, \
81 SOURCE_TIME_MONOTONIC, \
82 SOURCE_TIME_REALTIME_ALARM, \
83 SOURCE_TIME_BOOTTIME_ALARM, \
84 SOURCE_SIGNAL, \
85 SOURCE_DEFER, \
86 SOURCE_INOTIFY)
87
88/* This is used to assert that we didn't pass an unexpected source type to event_source_time_prioq_put().
89 * Time sources and ratelimited sources can be passed, so effectively this is the same as the
90 * EVENT_SOURCE_CAN_RATE_LIMIT() macro. */
91#define EVENT_SOURCE_USES_TIME_PRIOQ(t) EVENT_SOURCE_CAN_RATE_LIMIT(t)
92
93struct sd_event {
94 unsigned n_ref;
95
96 int epoll_fd;
97 int watchdog_fd;
98
99 Prioq *pending;
100 Prioq *prepare;
101
102 /* timerfd_create() only supports these five clocks so far. We
103 * can add support for more clocks when the kernel learns to
104 * deal with them, too. */
105 struct clock_data realtime;
106 struct clock_data boottime;
107 struct clock_data monotonic;
108 struct clock_data realtime_alarm;
109 struct clock_data boottime_alarm;
110
111 usec_t perturb;
112
113 sd_event_source **signal_sources; /* indexed by signal number */
114 Hashmap *signal_data; /* indexed by priority */
115
116 Hashmap *child_sources;
117 unsigned n_online_child_sources;
118
119 Set *post_sources;
120
121 Prioq *exit;
122
123 Hashmap *inotify_data; /* indexed by priority */
124
125 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
126 LIST_HEAD(struct inode_data, inode_data_to_close);
127
128 /* A list of inotify objects that already have events buffered which aren't processed yet */
129 LIST_HEAD(struct inotify_data, inotify_data_buffered);
130
131 pid_t original_pid;
132
133 uint64_t iteration;
134 triple_timestamp timestamp;
135 int state;
136
137 bool exit_requested:1;
138 bool need_process_child:1;
139 bool watchdog:1;
140 bool profile_delays:1;
141
142 int exit_code;
143
144 pid_t tid;
145 sd_event **default_event_ptr;
146
147 usec_t watchdog_last, watchdog_period;
148
149 unsigned n_sources;
150
151 struct epoll_event *event_queue;
152
153 LIST_HEAD(sd_event_source, sources);
154
155 usec_t last_run_usec, last_log_usec;
156 unsigned delays[sizeof(usec_t) * 8];
157};
158
159static thread_local sd_event *default_event = NULL;
160
161static void source_disconnect(sd_event_source *s);
162static void event_gc_inode_data(sd_event *e, struct inode_data *d);
163
164static sd_event *event_resolve(sd_event *e) {
165 return e == SD_EVENT_DEFAULT ? default_event : e;
166}
167
168static int pending_prioq_compare(const void *a, const void *b) {
169 const sd_event_source *x = a, *y = b;
170 int r;
171
172 assert(x->pending);
173 assert(y->pending);
174
175 /* Enabled ones first */
176 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
177 if (r != 0)
178 return r;
179
180 /* Non rate-limited ones first. */
181 r = CMP(!!x->ratelimited, !!y->ratelimited);
182 if (r != 0)
183 return r;
184
185 /* Lower priority values first */
186 r = CMP(x->priority, y->priority);
187 if (r != 0)
188 return r;
189
190 /* Older entries first */
191 return CMP(x->pending_iteration, y->pending_iteration);
192}
193
194static int prepare_prioq_compare(const void *a, const void *b) {
195 const sd_event_source *x = a, *y = b;
196 int r;
197
198 assert(x->prepare);
199 assert(y->prepare);
200
201 /* Enabled ones first */
202 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
203 if (r != 0)
204 return r;
205
206 /* Non rate-limited ones first. */
207 r = CMP(!!x->ratelimited, !!y->ratelimited);
208 if (r != 0)
209 return r;
210
211 /* Move most recently prepared ones last, so that we can stop
212 * preparing as soon as we hit one that has already been
213 * prepared in the current iteration */
214 r = CMP(x->prepare_iteration, y->prepare_iteration);
215 if (r != 0)
216 return r;
217
218 /* Lower priority values first */
219 return CMP(x->priority, y->priority);
220}
221
222static usec_t time_event_source_next(const sd_event_source *s) {
223 assert(s);
224
225 /* We have two kinds of event sources that have elapsation times associated with them: the actual
226 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
227 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
228 * looking at here. */
229
230 if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
231 assert(s->rate_limit.begin != 0);
232 assert(s->rate_limit.interval != 0);
233 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
234 }
235
236 /* Otherwise this must be a time event source, if not ratelimited */
237 if (EVENT_SOURCE_IS_TIME(s->type))
238 return s->time.next;
239
240 return USEC_INFINITY;
241}
242
243static usec_t time_event_source_latest(const sd_event_source *s) {
244 assert(s);
245
246 if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
247 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
248 * window */
249 assert(s->rate_limit.begin != 0);
250 assert(s->rate_limit.interval != 0);
251 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
252 }
253
254 /* Must be a time event source, if not ratelimited */
255 if (EVENT_SOURCE_IS_TIME(s->type))
256 return usec_add(s->time.next, s->time.accuracy);
257
258 return USEC_INFINITY;
259}
260
261static bool event_source_timer_candidate(const sd_event_source *s) {
262 assert(s);
263
264 /* Returns true for event sources that either are not pending yet (i.e. where it's worth to mark them pending)
265 * or which are currently ratelimited (i.e. where it's worth leaving the ratelimited state) */
266 return !s->pending || s->ratelimited;
267}
268
269static int time_prioq_compare(const void *a, const void *b, usec_t (*time_func)(const sd_event_source *s)) {
270 const sd_event_source *x = a, *y = b;
271 int r;
272
273 /* Enabled ones first */
274 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
275 if (r != 0)
276 return r;
277
278 /* Order "non-pending OR ratelimited" before "pending AND not-ratelimited" */
279 r = CMP(!event_source_timer_candidate(x), !event_source_timer_candidate(y));
280 if (r != 0)
281 return r;
282
283 /* Order by time */
284 return CMP(time_func(x), time_func(y));
285}
286
287static int earliest_time_prioq_compare(const void *a, const void *b) {
288 return time_prioq_compare(a, b, time_event_source_next);
289}
290
291static int latest_time_prioq_compare(const void *a, const void *b) {
292 return time_prioq_compare(a, b, time_event_source_latest);
293}
294
295static int exit_prioq_compare(const void *a, const void *b) {
296 const sd_event_source *x = a, *y = b;
297 int r;
298
299 assert(x->type == SOURCE_EXIT);
300 assert(y->type == SOURCE_EXIT);
301
302 /* Enabled ones first */
303 r = CMP(x->enabled == SD_EVENT_OFF, y->enabled == SD_EVENT_OFF);
304 if (r != 0)
305 return r;
306
307 /* Lower priority values first */
308 return CMP(x->priority, y->priority);
309}
310
311static void free_clock_data(struct clock_data *d) {
312 assert(d);
313 assert(d->wakeup == WAKEUP_CLOCK_DATA);
314
315 safe_close(d->fd);
316 prioq_free(d->earliest);
317 prioq_free(d->latest);
318}
319
320static sd_event *event_free(sd_event *e) {
321 sd_event_source *s;
322
323 assert(e);
324
325 while ((s = e->sources)) {
326 assert(s->floating);
327 source_disconnect(s);
328 sd_event_source_unref(s);
329 }
330
331 assert(e->n_sources == 0);
332
333 if (e->default_event_ptr)
334 *(e->default_event_ptr) = NULL;
335
336 safe_close(e->epoll_fd);
337 safe_close(e->watchdog_fd);
338
339 free_clock_data(&e->realtime);
340 free_clock_data(&e->boottime);
341 free_clock_data(&e->monotonic);
342 free_clock_data(&e->realtime_alarm);
343 free_clock_data(&e->boottime_alarm);
344
345 prioq_free(e->pending);
346 prioq_free(e->prepare);
347 prioq_free(e->exit);
348
349 free(e->signal_sources);
350 hashmap_free(e->signal_data);
351
352 hashmap_free(e->inotify_data);
353
354 hashmap_free(e->child_sources);
355 set_free(e->post_sources);
356
357 free(e->event_queue);
358
359 return mfree(e);
360}
361
362_public_ int sd_event_new(sd_event** ret) {
363 sd_event *e;
364 int r;
365
366 assert_return(ret, -EINVAL);
367
368 e = new(sd_event, 1);
369 if (!e)
370 return -ENOMEM;
371
372 *e = (sd_event) {
373 .n_ref = 1,
374 .epoll_fd = -1,
375 .watchdog_fd = -1,
376 .realtime.wakeup = WAKEUP_CLOCK_DATA,
377 .realtime.fd = -1,
378 .realtime.next = USEC_INFINITY,
379 .boottime.wakeup = WAKEUP_CLOCK_DATA,
380 .boottime.fd = -1,
381 .boottime.next = USEC_INFINITY,
382 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
383 .monotonic.fd = -1,
384 .monotonic.next = USEC_INFINITY,
385 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
386 .realtime_alarm.fd = -1,
387 .realtime_alarm.next = USEC_INFINITY,
388 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
389 .boottime_alarm.fd = -1,
390 .boottime_alarm.next = USEC_INFINITY,
391 .perturb = USEC_INFINITY,
392 .original_pid = getpid_cached(),
393 };
394
395 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
396 if (r < 0)
397 goto fail;
398
399 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
400 if (e->epoll_fd < 0) {
401 r = -errno;
402 goto fail;
403 }
404
405 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
406
407 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
408 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 … 2^63 us will be logged every 5s.");
409 e->profile_delays = true;
410 }
411
412 *ret = e;
413 return 0;
414
415fail:
416 event_free(e);
417 return r;
418}
419
420DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
421
422_public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
423 if (s)
424 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
425 return sd_event_source_unref(s);
426}
427
428static bool event_pid_changed(sd_event *e) {
429 assert(e);
430
431 /* We don't support people creating an event loop and keeping
432 * it around over a fork(). Let's complain. */
433
434 return e->original_pid != getpid_cached();
435}
436
437static void source_io_unregister(sd_event_source *s) {
438 assert(s);
439 assert(s->type == SOURCE_IO);
440
441 if (event_pid_changed(s->event))
442 return;
443
444 if (!s->io.registered)
445 return;
446
447 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
448 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
449 strna(s->description), event_source_type_to_string(s->type));
450
451 s->io.registered = false;
452}
453
454static int source_io_register(
455 sd_event_source *s,
456 int enabled,
457 uint32_t events) {
458
459 assert(s);
460 assert(s->type == SOURCE_IO);
461 assert(enabled != SD_EVENT_OFF);
462
463 struct epoll_event ev = {
464 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
465 .data.ptr = s,
466 };
467
468 if (epoll_ctl(s->event->epoll_fd,
469 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
470 s->io.fd, &ev) < 0)
471 return -errno;
472
473 s->io.registered = true;
474
475 return 0;
476}
477
478static void source_child_pidfd_unregister(sd_event_source *s) {
479 assert(s);
480 assert(s->type == SOURCE_CHILD);
481
482 if (event_pid_changed(s->event))
483 return;
484
485 if (!s->child.registered)
486 return;
487
488 if (EVENT_SOURCE_WATCH_PIDFD(s))
489 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
490 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
491 strna(s->description), event_source_type_to_string(s->type));
492
493 s->child.registered = false;
494}
495
496static int source_child_pidfd_register(sd_event_source *s, int enabled) {
497 assert(s);
498 assert(s->type == SOURCE_CHILD);
499 assert(enabled != SD_EVENT_OFF);
500
501 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
502 struct epoll_event ev = {
503 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
504 .data.ptr = s,
505 };
506
507 if (epoll_ctl(s->event->epoll_fd,
508 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
509 s->child.pidfd, &ev) < 0)
510 return -errno;
511 }
512
513 s->child.registered = true;
514 return 0;
515}
516
517static clockid_t event_source_type_to_clock(EventSourceType t) {
518
519 switch (t) {
520
521 case SOURCE_TIME_REALTIME:
522 return CLOCK_REALTIME;
523
524 case SOURCE_TIME_BOOTTIME:
525 return CLOCK_BOOTTIME;
526
527 case SOURCE_TIME_MONOTONIC:
528 return CLOCK_MONOTONIC;
529
530 case SOURCE_TIME_REALTIME_ALARM:
531 return CLOCK_REALTIME_ALARM;
532
533 case SOURCE_TIME_BOOTTIME_ALARM:
534 return CLOCK_BOOTTIME_ALARM;
535
536 default:
537 return (clockid_t) -1;
538 }
539}
540
541static EventSourceType clock_to_event_source_type(clockid_t clock) {
542
543 switch (clock) {
544
545 case CLOCK_REALTIME:
546 return SOURCE_TIME_REALTIME;
547
548 case CLOCK_BOOTTIME:
549 return SOURCE_TIME_BOOTTIME;
550
551 case CLOCK_MONOTONIC:
552 return SOURCE_TIME_MONOTONIC;
553
554 case CLOCK_REALTIME_ALARM:
555 return SOURCE_TIME_REALTIME_ALARM;
556
557 case CLOCK_BOOTTIME_ALARM:
558 return SOURCE_TIME_BOOTTIME_ALARM;
559
560 default:
561 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
562 }
563}
564
565static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
566 assert(e);
567
568 switch (t) {
569
570 case SOURCE_TIME_REALTIME:
571 return &e->realtime;
572
573 case SOURCE_TIME_BOOTTIME:
574 return &e->boottime;
575
576 case SOURCE_TIME_MONOTONIC:
577 return &e->monotonic;
578
579 case SOURCE_TIME_REALTIME_ALARM:
580 return &e->realtime_alarm;
581
582 case SOURCE_TIME_BOOTTIME_ALARM:
583 return &e->boottime_alarm;
584
585 default:
586 return NULL;
587 }
588}
589
590static void event_free_signal_data(sd_event *e, struct signal_data *d) {
591 assert(e);
592
593 if (!d)
594 return;
595
596 hashmap_remove(e->signal_data, &d->priority);
597 safe_close(d->fd);
598 free(d);
599}
600
601static int event_make_signal_data(
602 sd_event *e,
603 int sig,
604 struct signal_data **ret) {
605
606 struct signal_data *d;
607 bool added = false;
608 sigset_t ss_copy;
609 int64_t priority;
610 int r;
611
612 assert(e);
613
614 if (event_pid_changed(e))
615 return -ECHILD;
616
617 if (e->signal_sources && e->signal_sources[sig])
618 priority = e->signal_sources[sig]->priority;
619 else
620 priority = SD_EVENT_PRIORITY_NORMAL;
621
622 d = hashmap_get(e->signal_data, &priority);
623 if (d) {
624 if (sigismember(&d->sigset, sig) > 0) {
625 if (ret)
626 *ret = d;
627 return 0;
628 }
629 } else {
630 d = new(struct signal_data, 1);
631 if (!d)
632 return -ENOMEM;
633
634 *d = (struct signal_data) {
635 .wakeup = WAKEUP_SIGNAL_DATA,
636 .fd = -1,
637 .priority = priority,
638 };
639
640 r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
641 if (r < 0) {
642 free(d);
643 return r;
644 }
645
646 added = true;
647 }
648
649 ss_copy = d->sigset;
650 assert_se(sigaddset(&ss_copy, sig) >= 0);
651
652 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
653 if (r < 0) {
654 r = -errno;
655 goto fail;
656 }
657
658 d->sigset = ss_copy;
659
660 if (d->fd >= 0) {
661 if (ret)
662 *ret = d;
663 return 0;
664 }
665
666 d->fd = fd_move_above_stdio(r);
667
668 struct epoll_event ev = {
669 .events = EPOLLIN,
670 .data.ptr = d,
671 };
672
673 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
674 r = -errno;
675 goto fail;
676 }
677
678 if (ret)
679 *ret = d;
680
681 return 0;
682
683fail:
684 if (added)
685 event_free_signal_data(e, d);
686
687 return r;
688}
689
690static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
691 assert(e);
692 assert(d);
693
694 /* Turns off the specified signal in the signal data
695 * object. If the signal mask of the object becomes empty that
696 * way removes it. */
697
698 if (sigismember(&d->sigset, sig) == 0)
699 return;
700
701 assert_se(sigdelset(&d->sigset, sig) >= 0);
702
703 if (sigisemptyset(&d->sigset)) {
704 /* If all the mask is all-zero we can get rid of the structure */
705 event_free_signal_data(e, d);
706 return;
707 }
708
709 assert(d->fd >= 0);
710
711 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
712 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
713}
714
715static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
716 struct signal_data *d;
717 static const int64_t zero_priority = 0;
718
719 assert(e);
720
721 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
722 * and possibly drop the signalfd for it. */
723
724 if (sig == SIGCHLD &&
725 e->n_online_child_sources > 0)
726 return;
727
728 if (e->signal_sources &&
729 e->signal_sources[sig] &&
730 event_source_is_online(e->signal_sources[sig]))
731 return;
732
733 /*
734 * The specified signal might be enabled in three different queues:
735 *
736 * 1) the one that belongs to the priority passed (if it is non-NULL)
737 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
738 * 3) the 0 priority (to cover the SIGCHLD case)
739 *
740 * Hence, let's remove it from all three here.
741 */
742
743 if (priority) {
744 d = hashmap_get(e->signal_data, priority);
745 if (d)
746 event_unmask_signal_data(e, d, sig);
747 }
748
749 if (e->signal_sources && e->signal_sources[sig]) {
750 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
751 if (d)
752 event_unmask_signal_data(e, d, sig);
753 }
754
755 d = hashmap_get(e->signal_data, &zero_priority);
756 if (d)
757 event_unmask_signal_data(e, d, sig);
758}
759
760static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
761 assert(s);
762
763 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
764 * they are enabled/disabled or marked pending and such. */
765
766 if (s->pending)
767 prioq_reshuffle(s->event->pending, s, &s->pending_index);
768
769 if (s->prepare)
770 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
771}
772
773static void event_source_time_prioq_reshuffle(sd_event_source *s) {
774 struct clock_data *d;
775
776 assert(s);
777
778 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
779 * pending, enable state, and ratelimiting state. Makes sure the two prioq's are ordered
780 * properly again. */
781
782 if (s->ratelimited)
783 d = &s->event->monotonic;
784 else if (EVENT_SOURCE_IS_TIME(s->type))
785 assert_se(d = event_get_clock_data(s->event, s->type));
786 else
787 return; /* no-op for an event source which is neither a timer nor ratelimited. */
788
789 prioq_reshuffle(d->earliest, s, &s->earliest_index);
790 prioq_reshuffle(d->latest, s, &s->latest_index);
791 d->needs_rearm = true;
792}
793
794static void event_source_time_prioq_remove(
795 sd_event_source *s,
796 struct clock_data *d) {
797
798 assert(s);
799 assert(d);
800
801 prioq_remove(d->earliest, s, &s->earliest_index);
802 prioq_remove(d->latest, s, &s->latest_index);
803 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
804 d->needs_rearm = true;
805}
806
807static void source_disconnect(sd_event_source *s) {
808 sd_event *event;
809
810 assert(s);
811
812 if (!s->event)
813 return;
814
815 assert(s->event->n_sources > 0);
816
817 switch (s->type) {
818
819 case SOURCE_IO:
820 if (s->io.fd >= 0)
821 source_io_unregister(s);
822
823 break;
824
825 case SOURCE_TIME_REALTIME:
826 case SOURCE_TIME_BOOTTIME:
827 case SOURCE_TIME_MONOTONIC:
828 case SOURCE_TIME_REALTIME_ALARM:
829 case SOURCE_TIME_BOOTTIME_ALARM:
830 /* Only remove this event source from the time event source here if it is not ratelimited. If
831 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
832 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
833
834 if (!s->ratelimited) {
835 struct clock_data *d;
836 assert_se(d = event_get_clock_data(s->event, s->type));
837 event_source_time_prioq_remove(s, d);
838 }
839
840 break;
841
842 case SOURCE_SIGNAL:
843 if (s->signal.sig > 0) {
844
845 if (s->event->signal_sources)
846 s->event->signal_sources[s->signal.sig] = NULL;
847
848 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
849 }
850
851 break;
852
853 case SOURCE_CHILD:
854 if (s->child.pid > 0) {
855 if (event_source_is_online(s)) {
856 assert(s->event->n_online_child_sources > 0);
857 s->event->n_online_child_sources--;
858 }
859
860 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
861 }
862
863 if (EVENT_SOURCE_WATCH_PIDFD(s))
864 source_child_pidfd_unregister(s);
865 else
866 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
867
868 break;
869
870 case SOURCE_DEFER:
871 /* nothing */
872 break;
873
874 case SOURCE_POST:
875 set_remove(s->event->post_sources, s);
876 break;
877
878 case SOURCE_EXIT:
879 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
880 break;
881
882 case SOURCE_INOTIFY: {
883 struct inode_data *inode_data;
884
885 inode_data = s->inotify.inode_data;
886 if (inode_data) {
887 struct inotify_data *inotify_data;
888 assert_se(inotify_data = inode_data->inotify_data);
889
890 /* Detach this event source from the inode object */
891 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
892 s->inotify.inode_data = NULL;
893
894 if (s->pending) {
895 assert(inotify_data->n_pending > 0);
896 inotify_data->n_pending--;
897 }
898
899 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
900 * continued to being watched. That's because inotify doesn't really have an API for that: we
901 * can only change watch masks with access to the original inode either by fd or by path. But
902 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
903 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
904 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
905 * there), but given the need for open_by_handle_at() which is privileged and not universally
906 * available this would be quite an incomplete solution. Hence we go the other way, leave the
907 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
908 * anymore after reception. Yes, this sucks, but … Linux … */
909
910 /* Maybe release the inode data (and its inotify) */
911 event_gc_inode_data(s->event, inode_data);
912 }
913
914 break;
915 }
916
917 default:
918 assert_not_reached();
919 }
920
921 if (s->pending)
922 prioq_remove(s->event->pending, s, &s->pending_index);
923
924 if (s->prepare)
925 prioq_remove(s->event->prepare, s, &s->prepare_index);
926
927 if (s->ratelimited)
928 event_source_time_prioq_remove(s, &s->event->monotonic);
929
930 event = TAKE_PTR(s->event);
931 LIST_REMOVE(sources, event->sources, s);
932 event->n_sources--;
933
934 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
935 * pidfd associated with this event source, which we'll do only on source_free(). */
936
937 if (!s->floating)
938 sd_event_unref(event);
939}
940
941static sd_event_source* source_free(sd_event_source *s) {
942 assert(s);
943
944 source_disconnect(s);
945
946 if (s->type == SOURCE_IO && s->io.owned)
947 s->io.fd = safe_close(s->io.fd);
948
949 if (s->type == SOURCE_CHILD) {
950 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
951
952 if (s->child.process_owned) {
953
954 if (!s->child.exited) {
955 bool sent = false;
956
957 if (s->child.pidfd >= 0) {
958 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
959 if (errno == ESRCH) /* Already dead */
960 sent = true;
961 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
962 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
963 s->child.pid);
964 } else
965 sent = true;
966 }
967
968 if (!sent)
969 if (kill(s->child.pid, SIGKILL) < 0)
970 if (errno != ESRCH) /* Already dead */
971 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
972 s->child.pid);
973 }
974
975 if (!s->child.waited) {
976 siginfo_t si = {};
977
978 /* Reap the child if we can */
979 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
980 }
981 }
982
983 if (s->child.pidfd_owned)
984 s->child.pidfd = safe_close(s->child.pidfd);
985 }
986
987 if (s->destroy_callback)
988 s->destroy_callback(s->userdata);
989
990 free(s->description);
991 return mfree(s);
992}
993DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
994
995static int source_set_pending(sd_event_source *s, bool b) {
996 int r;
997
998 assert(s);
999 assert(s->type != SOURCE_EXIT);
1000
1001 if (s->pending == b)
1002 return 0;
1003
1004 s->pending = b;
1005
1006 if (b) {
1007 s->pending_iteration = s->event->iteration;
1008
1009 r = prioq_put(s->event->pending, s, &s->pending_index);
1010 if (r < 0) {
1011 s->pending = false;
1012 return r;
1013 }
1014 } else
1015 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1016
1017 if (EVENT_SOURCE_IS_TIME(s->type))
1018 event_source_time_prioq_reshuffle(s);
1019
1020 if (s->type == SOURCE_SIGNAL && !b) {
1021 struct signal_data *d;
1022
1023 d = hashmap_get(s->event->signal_data, &s->priority);
1024 if (d && d->current == s)
1025 d->current = NULL;
1026 }
1027
1028 if (s->type == SOURCE_INOTIFY) {
1029
1030 assert(s->inotify.inode_data);
1031 assert(s->inotify.inode_data->inotify_data);
1032
1033 if (b)
1034 s->inotify.inode_data->inotify_data->n_pending ++;
1035 else {
1036 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1037 s->inotify.inode_data->inotify_data->n_pending --;
1038 }
1039 }
1040
1041 return 1;
1042}
1043
1044static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1045 sd_event_source *s;
1046
1047 assert(e);
1048
1049 s = new(sd_event_source, 1);
1050 if (!s)
1051 return NULL;
1052
1053 *s = (struct sd_event_source) {
1054 .n_ref = 1,
1055 .event = e,
1056 .floating = floating,
1057 .type = type,
1058 .pending_index = PRIOQ_IDX_NULL,
1059 .prepare_index = PRIOQ_IDX_NULL,
1060 };
1061
1062 if (!floating)
1063 sd_event_ref(e);
1064
1065 LIST_PREPEND(sources, e->sources, s);
1066 e->n_sources++;
1067
1068 return s;
1069}
1070
1071static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1072 assert(s);
1073
1074 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1075}
1076
1077_public_ int sd_event_add_io(
1078 sd_event *e,
1079 sd_event_source **ret,
1080 int fd,
1081 uint32_t events,
1082 sd_event_io_handler_t callback,
1083 void *userdata) {
1084
1085 _cleanup_(source_freep) sd_event_source *s = NULL;
1086 int r;
1087
1088 assert_return(e, -EINVAL);
1089 assert_return(e = event_resolve(e), -ENOPKG);
1090 assert_return(fd >= 0, -EBADF);
1091 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1092 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1093 assert_return(!event_pid_changed(e), -ECHILD);
1094
1095 if (!callback)
1096 callback = io_exit_callback;
1097
1098 s = source_new(e, !ret, SOURCE_IO);
1099 if (!s)
1100 return -ENOMEM;
1101
1102 s->wakeup = WAKEUP_EVENT_SOURCE;
1103 s->io.fd = fd;
1104 s->io.events = events;
1105 s->io.callback = callback;
1106 s->userdata = userdata;
1107 s->enabled = SD_EVENT_ON;
1108
1109 r = source_io_register(s, s->enabled, events);
1110 if (r < 0)
1111 return r;
1112
1113 if (ret)
1114 *ret = s;
1115 TAKE_PTR(s);
1116
1117 return 0;
1118}
1119
1120static void initialize_perturb(sd_event *e) {
1121 sd_id128_t bootid = {};
1122
1123 /* When we sleep for longer, we try to realign the wakeup to
1124 the same time within each minute/second/250ms, so that
1125 events all across the system can be coalesced into a single
1126 CPU wakeup. However, let's take some system-specific
1127 randomness for this value, so that in a network of systems
1128 with synced clocks timer events are distributed a
1129 bit. Here, we calculate a perturbation usec offset from the
1130 boot ID. */
1131
1132 if (_likely_(e->perturb != USEC_INFINITY))
1133 return;
1134
1135 if (sd_id128_get_boot(&bootid) >= 0)
1136 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1137}
1138
1139static int event_setup_timer_fd(
1140 sd_event *e,
1141 struct clock_data *d,
1142 clockid_t clock) {
1143
1144 assert(e);
1145 assert(d);
1146
1147 if (_likely_(d->fd >= 0))
1148 return 0;
1149
1150 _cleanup_close_ int fd = -1;
1151
1152 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1153 if (fd < 0)
1154 return -errno;
1155
1156 fd = fd_move_above_stdio(fd);
1157
1158 struct epoll_event ev = {
1159 .events = EPOLLIN,
1160 .data.ptr = d,
1161 };
1162
1163 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
1164 return -errno;
1165
1166 d->fd = TAKE_FD(fd);
1167 return 0;
1168}
1169
1170static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1171 assert(s);
1172
1173 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1174}
1175
1176static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1177 int r;
1178
1179 assert(d);
1180
1181 if (d->fd < 0) {
1182 r = event_setup_timer_fd(e, d, clock);
1183 if (r < 0)
1184 return r;
1185 }
1186
1187 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1188 if (r < 0)
1189 return r;
1190
1191 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1192 if (r < 0)
1193 return r;
1194
1195 return 0;
1196}
1197
1198static int event_source_time_prioq_put(
1199 sd_event_source *s,
1200 struct clock_data *d) {
1201
1202 int r;
1203
1204 assert(s);
1205 assert(d);
1206 assert(EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
1207
1208 r = prioq_put(d->earliest, s, &s->earliest_index);
1209 if (r < 0)
1210 return r;
1211
1212 r = prioq_put(d->latest, s, &s->latest_index);
1213 if (r < 0) {
1214 assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1215 s->earliest_index = PRIOQ_IDX_NULL;
1216 return r;
1217 }
1218
1219 d->needs_rearm = true;
1220 return 0;
1221}
1222
1223_public_ int sd_event_add_time(
1224 sd_event *e,
1225 sd_event_source **ret,
1226 clockid_t clock,
1227 uint64_t usec,
1228 uint64_t accuracy,
1229 sd_event_time_handler_t callback,
1230 void *userdata) {
1231
1232 EventSourceType type;
1233 _cleanup_(source_freep) sd_event_source *s = NULL;
1234 struct clock_data *d;
1235 int r;
1236
1237 assert_return(e, -EINVAL);
1238 assert_return(e = event_resolve(e), -ENOPKG);
1239 assert_return(accuracy != UINT64_MAX, -EINVAL);
1240 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1241 assert_return(!event_pid_changed(e), -ECHILD);
1242
1243 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1244 return -EOPNOTSUPP;
1245
1246 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1247 if (type < 0)
1248 return -EOPNOTSUPP;
1249
1250 if (!callback)
1251 callback = time_exit_callback;
1252
1253 assert_se(d = event_get_clock_data(e, type));
1254
1255 r = setup_clock_data(e, d, clock);
1256 if (r < 0)
1257 return r;
1258
1259 s = source_new(e, !ret, type);
1260 if (!s)
1261 return -ENOMEM;
1262
1263 s->time.next = usec;
1264 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1265 s->time.callback = callback;
1266 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1267 s->userdata = userdata;
1268 s->enabled = SD_EVENT_ONESHOT;
1269
1270 r = event_source_time_prioq_put(s, d);
1271 if (r < 0)
1272 return r;
1273
1274 if (ret)
1275 *ret = s;
1276 TAKE_PTR(s);
1277
1278 return 0;
1279}
1280
1281_public_ int sd_event_add_time_relative(
1282 sd_event *e,
1283 sd_event_source **ret,
1284 clockid_t clock,
1285 uint64_t usec,
1286 uint64_t accuracy,
1287 sd_event_time_handler_t callback,
1288 void *userdata) {
1289
1290 usec_t t;
1291 int r;
1292
1293 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1294 * checks for overflow. */
1295
1296 r = sd_event_now(e, clock, &t);
1297 if (r < 0)
1298 return r;
1299
1300 if (usec >= USEC_INFINITY - t)
1301 return -EOVERFLOW;
1302
1303 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1304}
1305
1306static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1307 assert(s);
1308
1309 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1310}
1311
1312_public_ int sd_event_add_signal(
1313 sd_event *e,
1314 sd_event_source **ret,
1315 int sig,
1316 sd_event_signal_handler_t callback,
1317 void *userdata) {
1318
1319 _cleanup_(source_freep) sd_event_source *s = NULL;
1320 struct signal_data *d;
1321 int r;
1322
1323 assert_return(e, -EINVAL);
1324 assert_return(e = event_resolve(e), -ENOPKG);
1325 assert_return(SIGNAL_VALID(sig), -EINVAL);
1326 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1327 assert_return(!event_pid_changed(e), -ECHILD);
1328
1329 if (!callback)
1330 callback = signal_exit_callback;
1331
1332 r = signal_is_blocked(sig);
1333 if (r < 0)
1334 return r;
1335 if (r == 0)
1336 return -EBUSY;
1337
1338 if (!e->signal_sources) {
1339 e->signal_sources = new0(sd_event_source*, _NSIG);
1340 if (!e->signal_sources)
1341 return -ENOMEM;
1342 } else if (e->signal_sources[sig])
1343 return -EBUSY;
1344
1345 s = source_new(e, !ret, SOURCE_SIGNAL);
1346 if (!s)
1347 return -ENOMEM;
1348
1349 s->signal.sig = sig;
1350 s->signal.callback = callback;
1351 s->userdata = userdata;
1352 s->enabled = SD_EVENT_ON;
1353
1354 e->signal_sources[sig] = s;
1355
1356 r = event_make_signal_data(e, sig, &d);
1357 if (r < 0)
1358 return r;
1359
1360 /* Use the signal name as description for the event source by default */
1361 (void) sd_event_source_set_description(s, signal_to_string(sig));
1362
1363 if (ret)
1364 *ret = s;
1365 TAKE_PTR(s);
1366
1367 return 0;
1368}
1369
1370static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1371 assert(s);
1372
1373 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1374}
1375
1376static bool shall_use_pidfd(void) {
1377 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1378 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1379}
1380
1381_public_ int sd_event_add_child(
1382 sd_event *e,
1383 sd_event_source **ret,
1384 pid_t pid,
1385 int options,
1386 sd_event_child_handler_t callback,
1387 void *userdata) {
1388
1389 _cleanup_(source_freep) sd_event_source *s = NULL;
1390 int r;
1391
1392 assert_return(e, -EINVAL);
1393 assert_return(e = event_resolve(e), -ENOPKG);
1394 assert_return(pid > 1, -EINVAL);
1395 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1396 assert_return(options != 0, -EINVAL);
1397 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1398 assert_return(!event_pid_changed(e), -ECHILD);
1399
1400 if (!callback)
1401 callback = child_exit_callback;
1402
1403 if (e->n_online_child_sources == 0) {
1404 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1405 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1406 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1407 * take effect.
1408 *
1409 * (As an optimization we only do this check on the first child event source created.) */
1410 r = signal_is_blocked(SIGCHLD);
1411 if (r < 0)
1412 return r;
1413 if (r == 0)
1414 return -EBUSY;
1415 }
1416
1417 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1418 if (r < 0)
1419 return r;
1420
1421 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1422 return -EBUSY;
1423
1424 s = source_new(e, !ret, SOURCE_CHILD);
1425 if (!s)
1426 return -ENOMEM;
1427
1428 s->wakeup = WAKEUP_EVENT_SOURCE;
1429 s->child.pid = pid;
1430 s->child.options = options;
1431 s->child.callback = callback;
1432 s->userdata = userdata;
1433 s->enabled = SD_EVENT_ONESHOT;
1434
1435 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1436 * pin the PID, and make regular waitid() handling race-free. */
1437
1438 if (shall_use_pidfd()) {
1439 s->child.pidfd = pidfd_open(s->child.pid, 0);
1440 if (s->child.pidfd < 0) {
1441 /* Propagate errors unless the syscall is not supported or blocked */
1442 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1443 return -errno;
1444 } else
1445 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1446 } else
1447 s->child.pidfd = -1;
1448
1449 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1450 if (r < 0)
1451 return r;
1452
1453 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1454 /* We have a pidfd and we only want to watch for exit */
1455 r = source_child_pidfd_register(s, s->enabled);
1456 if (r < 0)
1457 return r;
1458
1459 } else {
1460 /* We have no pidfd or we shall wait for some other event than WEXITED */
1461 r = event_make_signal_data(e, SIGCHLD, NULL);
1462 if (r < 0)
1463 return r;
1464
1465 e->need_process_child = true;
1466 }
1467
1468 e->n_online_child_sources++;
1469
1470 if (ret)
1471 *ret = s;
1472 TAKE_PTR(s);
1473 return 0;
1474}
1475
1476_public_ int sd_event_add_child_pidfd(
1477 sd_event *e,
1478 sd_event_source **ret,
1479 int pidfd,
1480 int options,
1481 sd_event_child_handler_t callback,
1482 void *userdata) {
1483
1484
1485 _cleanup_(source_freep) sd_event_source *s = NULL;
1486 pid_t pid;
1487 int r;
1488
1489 assert_return(e, -EINVAL);
1490 assert_return(e = event_resolve(e), -ENOPKG);
1491 assert_return(pidfd >= 0, -EBADF);
1492 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1493 assert_return(options != 0, -EINVAL);
1494 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1495 assert_return(!event_pid_changed(e), -ECHILD);
1496
1497 if (!callback)
1498 callback = child_exit_callback;
1499
1500 if (e->n_online_child_sources == 0) {
1501 r = signal_is_blocked(SIGCHLD);
1502 if (r < 0)
1503 return r;
1504 if (r == 0)
1505 return -EBUSY;
1506 }
1507
1508 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1509 if (r < 0)
1510 return r;
1511
1512 r = pidfd_get_pid(pidfd, &pid);
1513 if (r < 0)
1514 return r;
1515
1516 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1517 return -EBUSY;
1518
1519 s = source_new(e, !ret, SOURCE_CHILD);
1520 if (!s)
1521 return -ENOMEM;
1522
1523 s->wakeup = WAKEUP_EVENT_SOURCE;
1524 s->child.pidfd = pidfd;
1525 s->child.pid = pid;
1526 s->child.options = options;
1527 s->child.callback = callback;
1528 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1529 s->userdata = userdata;
1530 s->enabled = SD_EVENT_ONESHOT;
1531
1532 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1533 if (r < 0)
1534 return r;
1535
1536 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1537 /* We only want to watch for WEXITED */
1538 r = source_child_pidfd_register(s, s->enabled);
1539 if (r < 0)
1540 return r;
1541 } else {
1542 /* We shall wait for some other event than WEXITED */
1543 r = event_make_signal_data(e, SIGCHLD, NULL);
1544 if (r < 0)
1545 return r;
1546
1547 e->need_process_child = true;
1548 }
1549
1550 e->n_online_child_sources++;
1551
1552 if (ret)
1553 *ret = s;
1554 TAKE_PTR(s);
1555 return 0;
1556}
1557
1558static int generic_exit_callback(sd_event_source *s, void *userdata) {
1559 assert(s);
1560
1561 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1562}
1563
1564_public_ int sd_event_add_defer(
1565 sd_event *e,
1566 sd_event_source **ret,
1567 sd_event_handler_t callback,
1568 void *userdata) {
1569
1570 _cleanup_(source_freep) sd_event_source *s = NULL;
1571 int r;
1572
1573 assert_return(e, -EINVAL);
1574 assert_return(e = event_resolve(e), -ENOPKG);
1575 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1576 assert_return(!event_pid_changed(e), -ECHILD);
1577
1578 if (!callback)
1579 callback = generic_exit_callback;
1580
1581 s = source_new(e, !ret, SOURCE_DEFER);
1582 if (!s)
1583 return -ENOMEM;
1584
1585 s->defer.callback = callback;
1586 s->userdata = userdata;
1587 s->enabled = SD_EVENT_ONESHOT;
1588
1589 r = source_set_pending(s, true);
1590 if (r < 0)
1591 return r;
1592
1593 if (ret)
1594 *ret = s;
1595 TAKE_PTR(s);
1596
1597 return 0;
1598}
1599
1600_public_ int sd_event_add_post(
1601 sd_event *e,
1602 sd_event_source **ret,
1603 sd_event_handler_t callback,
1604 void *userdata) {
1605
1606 _cleanup_(source_freep) sd_event_source *s = NULL;
1607 int r;
1608
1609 assert_return(e, -EINVAL);
1610 assert_return(e = event_resolve(e), -ENOPKG);
1611 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1612 assert_return(!event_pid_changed(e), -ECHILD);
1613
1614 if (!callback)
1615 callback = generic_exit_callback;
1616
1617 s = source_new(e, !ret, SOURCE_POST);
1618 if (!s)
1619 return -ENOMEM;
1620
1621 s->post.callback = callback;
1622 s->userdata = userdata;
1623 s->enabled = SD_EVENT_ON;
1624
1625 r = set_ensure_put(&e->post_sources, NULL, s);
1626 if (r < 0)
1627 return r;
1628 assert(r > 0);
1629
1630 if (ret)
1631 *ret = s;
1632 TAKE_PTR(s);
1633
1634 return 0;
1635}
1636
1637_public_ int sd_event_add_exit(
1638 sd_event *e,
1639 sd_event_source **ret,
1640 sd_event_handler_t callback,
1641 void *userdata) {
1642
1643 _cleanup_(source_freep) sd_event_source *s = NULL;
1644 int r;
1645
1646 assert_return(e, -EINVAL);
1647 assert_return(e = event_resolve(e), -ENOPKG);
1648 assert_return(callback, -EINVAL);
1649 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1650 assert_return(!event_pid_changed(e), -ECHILD);
1651
1652 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1653 if (r < 0)
1654 return r;
1655
1656 s = source_new(e, !ret, SOURCE_EXIT);
1657 if (!s)
1658 return -ENOMEM;
1659
1660 s->exit.callback = callback;
1661 s->userdata = userdata;
1662 s->exit.prioq_index = PRIOQ_IDX_NULL;
1663 s->enabled = SD_EVENT_ONESHOT;
1664
1665 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1666 if (r < 0)
1667 return r;
1668
1669 if (ret)
1670 *ret = s;
1671 TAKE_PTR(s);
1672
1673 return 0;
1674}
1675
1676static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1677 assert(e);
1678
1679 if (!d)
1680 return;
1681
1682 assert(hashmap_isempty(d->inodes));
1683 assert(hashmap_isempty(d->wd));
1684
1685 if (d->buffer_filled > 0)
1686 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1687
1688 hashmap_free(d->inodes);
1689 hashmap_free(d->wd);
1690
1691 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1692
1693 if (d->fd >= 0) {
1694 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1695 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1696
1697 safe_close(d->fd);
1698 }
1699 free(d);
1700}
1701
1702static int event_make_inotify_data(
1703 sd_event *e,
1704 int64_t priority,
1705 struct inotify_data **ret) {
1706
1707 _cleanup_close_ int fd = -1;
1708 struct inotify_data *d;
1709 int r;
1710
1711 assert(e);
1712
1713 d = hashmap_get(e->inotify_data, &priority);
1714 if (d) {
1715 if (ret)
1716 *ret = d;
1717 return 0;
1718 }
1719
1720 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1721 if (fd < 0)
1722 return -errno;
1723
1724 fd = fd_move_above_stdio(fd);
1725
1726 d = new(struct inotify_data, 1);
1727 if (!d)
1728 return -ENOMEM;
1729
1730 *d = (struct inotify_data) {
1731 .wakeup = WAKEUP_INOTIFY_DATA,
1732 .fd = TAKE_FD(fd),
1733 .priority = priority,
1734 };
1735
1736 r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
1737 if (r < 0) {
1738 d->fd = safe_close(d->fd);
1739 free(d);
1740 return r;
1741 }
1742
1743 struct epoll_event ev = {
1744 .events = EPOLLIN,
1745 .data.ptr = d,
1746 };
1747
1748 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1749 r = -errno;
1750 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1751 * remove the fd from the epoll first, which we don't want as we couldn't
1752 * add it in the first place. */
1753 event_free_inotify_data(e, d);
1754 return r;
1755 }
1756
1757 if (ret)
1758 *ret = d;
1759
1760 return 1;
1761}
1762
1763static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1764 int r;
1765
1766 assert(x);
1767 assert(y);
1768
1769 r = CMP(x->dev, y->dev);
1770 if (r != 0)
1771 return r;
1772
1773 return CMP(x->ino, y->ino);
1774}
1775
1776static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1777 assert(d);
1778
1779 siphash24_compress(&d->dev, sizeof(d->dev), state);
1780 siphash24_compress(&d->ino, sizeof(d->ino), state);
1781}
1782
1783DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1784
1785static void event_free_inode_data(
1786 sd_event *e,
1787 struct inode_data *d) {
1788
1789 assert(e);
1790
1791 if (!d)
1792 return;
1793
1794 assert(!d->event_sources);
1795
1796 if (d->fd >= 0) {
1797 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1798 safe_close(d->fd);
1799 }
1800
1801 if (d->inotify_data) {
1802
1803 if (d->wd >= 0) {
1804 if (d->inotify_data->fd >= 0) {
1805 /* So here's a problem. At the time this runs the watch descriptor might already be
1806 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1807 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1808 * likely case to happen. */
1809
1810 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1811 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1812 }
1813
1814 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1815 }
1816
1817 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1818 }
1819
1820 free(d);
1821}
1822
1823static void event_gc_inotify_data(
1824 sd_event *e,
1825 struct inotify_data *d) {
1826
1827 assert(e);
1828
1829 /* GCs the inotify data object if we don't need it anymore. That's the case if we don't want to watch
1830 * any inode with it anymore, which in turn happens if no event source of this priority is interested
1831 * in any inode any longer. That said, we maintain an extra busy counter: if non-zero we'll delay GC
1832 * (under the expectation that the GC is called again once the counter is decremented). */
1833
1834 if (!d)
1835 return;
1836
1837 if (!hashmap_isempty(d->inodes))
1838 return;
1839
1840 if (d->n_busy > 0)
1841 return;
1842
1843 event_free_inotify_data(e, d);
1844}
1845
1846static void event_gc_inode_data(
1847 sd_event *e,
1848 struct inode_data *d) {
1849
1850 struct inotify_data *inotify_data;
1851
1852 assert(e);
1853
1854 if (!d)
1855 return;
1856
1857 if (d->event_sources)
1858 return;
1859
1860 inotify_data = d->inotify_data;
1861 event_free_inode_data(e, d);
1862
1863 event_gc_inotify_data(e, inotify_data);
1864}
1865
1866static int event_make_inode_data(
1867 sd_event *e,
1868 struct inotify_data *inotify_data,
1869 dev_t dev,
1870 ino_t ino,
1871 struct inode_data **ret) {
1872
1873 struct inode_data *d, key;
1874 int r;
1875
1876 assert(e);
1877 assert(inotify_data);
1878
1879 key = (struct inode_data) {
1880 .ino = ino,
1881 .dev = dev,
1882 };
1883
1884 d = hashmap_get(inotify_data->inodes, &key);
1885 if (d) {
1886 if (ret)
1887 *ret = d;
1888
1889 return 0;
1890 }
1891
1892 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1893 if (r < 0)
1894 return r;
1895
1896 d = new(struct inode_data, 1);
1897 if (!d)
1898 return -ENOMEM;
1899
1900 *d = (struct inode_data) {
1901 .dev = dev,
1902 .ino = ino,
1903 .wd = -1,
1904 .fd = -1,
1905 .inotify_data = inotify_data,
1906 };
1907
1908 r = hashmap_put(inotify_data->inodes, d, d);
1909 if (r < 0) {
1910 free(d);
1911 return r;
1912 }
1913
1914 if (ret)
1915 *ret = d;
1916
1917 return 1;
1918}
1919
1920static uint32_t inode_data_determine_mask(struct inode_data *d) {
1921 bool excl_unlink = true;
1922 uint32_t combined = 0;
1923
1924 assert(d);
1925
1926 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1927 * the IN_EXCL_UNLINK flag is ANDed instead.
1928 *
1929 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1930 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1931 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1932 * events we don't care for client-side. */
1933
1934 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1935
1936 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1937 excl_unlink = false;
1938
1939 combined |= s->inotify.mask;
1940 }
1941
1942 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1943}
1944
1945static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1946 uint32_t combined_mask;
1947 int wd, r;
1948
1949 assert(d);
1950 assert(d->fd >= 0);
1951
1952 combined_mask = inode_data_determine_mask(d);
1953
1954 if (d->wd >= 0 && combined_mask == d->combined_mask)
1955 return 0;
1956
1957 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1958 if (r < 0)
1959 return r;
1960
1961 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1962 if (wd < 0)
1963 return -errno;
1964
1965 if (d->wd < 0) {
1966 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1967 if (r < 0) {
1968 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1969 return r;
1970 }
1971
1972 d->wd = wd;
1973
1974 } else if (d->wd != wd) {
1975
1976 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1977 (void) inotify_rm_watch(d->fd, wd);
1978 return -EINVAL;
1979 }
1980
1981 d->combined_mask = combined_mask;
1982 return 1;
1983}
1984
1985static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1986 assert(s);
1987
1988 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1989}
1990
1991static int event_add_inotify_fd_internal(
1992 sd_event *e,
1993 sd_event_source **ret,
1994 int fd,
1995 bool donate,
1996 uint32_t mask,
1997 sd_event_inotify_handler_t callback,
1998 void *userdata) {
1999
2000 _cleanup_close_ int donated_fd = donate ? fd : -1;
2001 _cleanup_(source_freep) sd_event_source *s = NULL;
2002 struct inotify_data *inotify_data = NULL;
2003 struct inode_data *inode_data = NULL;
2004 struct stat st;
2005 int r;
2006
2007 assert_return(e, -EINVAL);
2008 assert_return(e = event_resolve(e), -ENOPKG);
2009 assert_return(fd >= 0, -EBADF);
2010 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2011 assert_return(!event_pid_changed(e), -ECHILD);
2012
2013 if (!callback)
2014 callback = inotify_exit_callback;
2015
2016 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
2017 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
2018 * the user can't use them for us. */
2019 if (mask & IN_MASK_ADD)
2020 return -EINVAL;
2021
2022 if (fstat(fd, &st) < 0)
2023 return -errno;
2024
2025 s = source_new(e, !ret, SOURCE_INOTIFY);
2026 if (!s)
2027 return -ENOMEM;
2028
2029 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
2030 s->inotify.mask = mask;
2031 s->inotify.callback = callback;
2032 s->userdata = userdata;
2033
2034 /* Allocate an inotify object for this priority, and an inode object within it */
2035 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
2036 if (r < 0)
2037 return r;
2038
2039 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
2040 if (r < 0) {
2041 event_gc_inotify_data(e, inotify_data);
2042 return r;
2043 }
2044
2045 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2046 * the event source, until then, for which we need the original inode. */
2047 if (inode_data->fd < 0) {
2048 if (donated_fd >= 0)
2049 inode_data->fd = TAKE_FD(donated_fd);
2050 else {
2051 inode_data->fd = fcntl(fd, F_DUPFD_CLOEXEC, 3);
2052 if (inode_data->fd < 0) {
2053 r = -errno;
2054 event_gc_inode_data(e, inode_data);
2055 return r;
2056 }
2057 }
2058
2059 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
2060 }
2061
2062 /* Link our event source to the inode data object */
2063 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
2064 s->inotify.inode_data = inode_data;
2065
2066 /* Actually realize the watch now */
2067 r = inode_data_realize_watch(e, inode_data);
2068 if (r < 0)
2069 return r;
2070
2071 if (ret)
2072 *ret = s;
2073 TAKE_PTR(s);
2074
2075 return 0;
2076}
2077
2078_public_ int sd_event_add_inotify_fd(
2079 sd_event *e,
2080 sd_event_source **ret,
2081 int fd,
2082 uint32_t mask,
2083 sd_event_inotify_handler_t callback,
2084 void *userdata) {
2085
2086 return event_add_inotify_fd_internal(e, ret, fd, /* donate= */ false, mask, callback, userdata);
2087}
2088
2089_public_ int sd_event_add_inotify(
2090 sd_event *e,
2091 sd_event_source **ret,
2092 const char *path,
2093 uint32_t mask,
2094 sd_event_inotify_handler_t callback,
2095 void *userdata) {
2096
2097 sd_event_source *s = NULL; /* avoid false maybe-uninitialized warning */
2098 int fd, r;
2099
2100 assert_return(path, -EINVAL);
2101
2102 fd = open(path, O_PATH|O_CLOEXEC|
2103 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
2104 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
2105 if (fd < 0)
2106 return -errno;
2107
2108 r = event_add_inotify_fd_internal(e, &s, fd, /* donate= */ true, mask, callback, userdata);
2109 if (r < 0)
2110 return r;
2111
2112 (void) sd_event_source_set_description(s, path);
2113
2114 if (ret)
2115 *ret = s;
2116
2117 return r;
2118}
2119
2120static sd_event_source* event_source_free(sd_event_source *s) {
2121 if (!s)
2122 return NULL;
2123
2124 /* Here's a special hack: when we are called from a
2125 * dispatch handler we won't free the event source
2126 * immediately, but we will detach the fd from the
2127 * epoll. This way it is safe for the caller to unref
2128 * the event source and immediately close the fd, but
2129 * we still retain a valid event source object after
2130 * the callback. */
2131
2132 if (s->dispatching) {
2133 if (s->type == SOURCE_IO)
2134 source_io_unregister(s);
2135
2136 source_disconnect(s);
2137 } else
2138 source_free(s);
2139
2140 return NULL;
2141}
2142
2143DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2144
2145_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
2146 assert_return(s, -EINVAL);
2147 assert_return(!event_pid_changed(s->event), -ECHILD);
2148
2149 return free_and_strdup(&s->description, description);
2150}
2151
2152_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
2153 assert_return(s, -EINVAL);
2154 assert_return(description, -EINVAL);
2155 assert_return(!event_pid_changed(s->event), -ECHILD);
2156
2157 if (!s->description)
2158 return -ENXIO;
2159
2160 *description = s->description;
2161 return 0;
2162}
2163
2164_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
2165 assert_return(s, NULL);
2166
2167 return s->event;
2168}
2169
2170_public_ int sd_event_source_get_pending(sd_event_source *s) {
2171 assert_return(s, -EINVAL);
2172 assert_return(s->type != SOURCE_EXIT, -EDOM);
2173 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2174 assert_return(!event_pid_changed(s->event), -ECHILD);
2175
2176 return s->pending;
2177}
2178
2179_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
2180 assert_return(s, -EINVAL);
2181 assert_return(s->type == SOURCE_IO, -EDOM);
2182 assert_return(!event_pid_changed(s->event), -ECHILD);
2183
2184 return s->io.fd;
2185}
2186
2187_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2188 int r;
2189
2190 assert_return(s, -EINVAL);
2191 assert_return(fd >= 0, -EBADF);
2192 assert_return(s->type == SOURCE_IO, -EDOM);
2193 assert_return(!event_pid_changed(s->event), -ECHILD);
2194
2195 if (s->io.fd == fd)
2196 return 0;
2197
2198 if (event_source_is_offline(s)) {
2199 s->io.fd = fd;
2200 s->io.registered = false;
2201 } else {
2202 int saved_fd;
2203
2204 saved_fd = s->io.fd;
2205 assert(s->io.registered);
2206
2207 s->io.fd = fd;
2208 s->io.registered = false;
2209
2210 r = source_io_register(s, s->enabled, s->io.events);
2211 if (r < 0) {
2212 s->io.fd = saved_fd;
2213 s->io.registered = true;
2214 return r;
2215 }
2216
2217 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2218 }
2219
2220 return 0;
2221}
2222
2223_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2224 assert_return(s, -EINVAL);
2225 assert_return(s->type == SOURCE_IO, -EDOM);
2226
2227 return s->io.owned;
2228}
2229
2230_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2231 assert_return(s, -EINVAL);
2232 assert_return(s->type == SOURCE_IO, -EDOM);
2233
2234 s->io.owned = own;
2235 return 0;
2236}
2237
2238_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2239 assert_return(s, -EINVAL);
2240 assert_return(events, -EINVAL);
2241 assert_return(s->type == SOURCE_IO, -EDOM);
2242 assert_return(!event_pid_changed(s->event), -ECHILD);
2243
2244 *events = s->io.events;
2245 return 0;
2246}
2247
2248_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2249 int r;
2250
2251 assert_return(s, -EINVAL);
2252 assert_return(s->type == SOURCE_IO, -EDOM);
2253 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2254 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2255 assert_return(!event_pid_changed(s->event), -ECHILD);
2256
2257 /* edge-triggered updates are never skipped, so we can reset edges */
2258 if (s->io.events == events && !(events & EPOLLET))
2259 return 0;
2260
2261 r = source_set_pending(s, false);
2262 if (r < 0)
2263 return r;
2264
2265 if (event_source_is_online(s)) {
2266 r = source_io_register(s, s->enabled, events);
2267 if (r < 0)
2268 return r;
2269 }
2270
2271 s->io.events = events;
2272
2273 return 0;
2274}
2275
2276_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2277 assert_return(s, -EINVAL);
2278 assert_return(revents, -EINVAL);
2279 assert_return(s->type == SOURCE_IO, -EDOM);
2280 assert_return(s->pending, -ENODATA);
2281 assert_return(!event_pid_changed(s->event), -ECHILD);
2282
2283 *revents = s->io.revents;
2284 return 0;
2285}
2286
2287_public_ int sd_event_source_get_signal(sd_event_source *s) {
2288 assert_return(s, -EINVAL);
2289 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2290 assert_return(!event_pid_changed(s->event), -ECHILD);
2291
2292 return s->signal.sig;
2293}
2294
2295_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2296 assert_return(s, -EINVAL);
2297 assert_return(!event_pid_changed(s->event), -ECHILD);
2298
2299 *priority = s->priority;
2300 return 0;
2301}
2302
2303_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2304 bool rm_inotify = false, rm_inode = false;
2305 struct inotify_data *new_inotify_data = NULL;
2306 struct inode_data *new_inode_data = NULL;
2307 int r;
2308
2309 assert_return(s, -EINVAL);
2310 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2311 assert_return(!event_pid_changed(s->event), -ECHILD);
2312
2313 if (s->priority == priority)
2314 return 0;
2315
2316 if (s->type == SOURCE_INOTIFY) {
2317 struct inode_data *old_inode_data;
2318
2319 assert(s->inotify.inode_data);
2320 old_inode_data = s->inotify.inode_data;
2321
2322 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2323 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2324 * events we allow priority changes only until the first following iteration. */
2325 if (old_inode_data->fd < 0)
2326 return -EOPNOTSUPP;
2327
2328 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2329 if (r < 0)
2330 return r;
2331 rm_inotify = r > 0;
2332
2333 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2334 if (r < 0)
2335 goto fail;
2336 rm_inode = r > 0;
2337
2338 if (new_inode_data->fd < 0) {
2339 /* Duplicate the fd for the new inode object if we don't have any yet */
2340 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2341 if (new_inode_data->fd < 0) {
2342 r = -errno;
2343 goto fail;
2344 }
2345
2346 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2347 }
2348
2349 /* Move the event source to the new inode data structure */
2350 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2351 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2352 s->inotify.inode_data = new_inode_data;
2353
2354 /* Now create the new watch */
2355 r = inode_data_realize_watch(s->event, new_inode_data);
2356 if (r < 0) {
2357 /* Move it back */
2358 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2359 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2360 s->inotify.inode_data = old_inode_data;
2361 goto fail;
2362 }
2363
2364 s->priority = priority;
2365
2366 event_gc_inode_data(s->event, old_inode_data);
2367
2368 } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
2369 struct signal_data *old, *d;
2370
2371 /* Move us from the signalfd belonging to the old
2372 * priority to the signalfd of the new priority */
2373
2374 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2375
2376 s->priority = priority;
2377
2378 r = event_make_signal_data(s->event, s->signal.sig, &d);
2379 if (r < 0) {
2380 s->priority = old->priority;
2381 return r;
2382 }
2383
2384 event_unmask_signal_data(s->event, old, s->signal.sig);
2385 } else
2386 s->priority = priority;
2387
2388 event_source_pp_prioq_reshuffle(s);
2389
2390 if (s->type == SOURCE_EXIT)
2391 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2392
2393 return 0;
2394
2395fail:
2396 if (rm_inode)
2397 event_free_inode_data(s->event, new_inode_data);
2398
2399 if (rm_inotify)
2400 event_free_inotify_data(s->event, new_inotify_data);
2401
2402 return r;
2403}
2404
2405_public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
2406 assert_return(s, -EINVAL);
2407 assert_return(!event_pid_changed(s->event), -ECHILD);
2408
2409 if (ret)
2410 *ret = s->enabled;
2411
2412 return s->enabled != SD_EVENT_OFF;
2413}
2414
2415static int event_source_offline(
2416 sd_event_source *s,
2417 int enabled,
2418 bool ratelimited) {
2419
2420 bool was_offline;
2421 int r;
2422
2423 assert(s);
2424 assert(enabled == SD_EVENT_OFF || ratelimited);
2425
2426 /* Unset the pending flag when this event source is disabled */
2427 if (s->enabled != SD_EVENT_OFF &&
2428 enabled == SD_EVENT_OFF &&
2429 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2430 r = source_set_pending(s, false);
2431 if (r < 0)
2432 return r;
2433 }
2434
2435 was_offline = event_source_is_offline(s);
2436 s->enabled = enabled;
2437 s->ratelimited = ratelimited;
2438
2439 switch (s->type) {
2440
2441 case SOURCE_IO:
2442 source_io_unregister(s);
2443 break;
2444
2445 case SOURCE_SIGNAL:
2446 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2447 break;
2448
2449 case SOURCE_CHILD:
2450 if (!was_offline) {
2451 assert(s->event->n_online_child_sources > 0);
2452 s->event->n_online_child_sources--;
2453 }
2454
2455 if (EVENT_SOURCE_WATCH_PIDFD(s))
2456 source_child_pidfd_unregister(s);
2457 else
2458 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2459 break;
2460
2461 case SOURCE_EXIT:
2462 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2463 break;
2464
2465 case SOURCE_TIME_REALTIME:
2466 case SOURCE_TIME_BOOTTIME:
2467 case SOURCE_TIME_MONOTONIC:
2468 case SOURCE_TIME_REALTIME_ALARM:
2469 case SOURCE_TIME_BOOTTIME_ALARM:
2470 case SOURCE_DEFER:
2471 case SOURCE_POST:
2472 case SOURCE_INOTIFY:
2473 break;
2474
2475 default:
2476 assert_not_reached();
2477 }
2478
2479 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2480 event_source_time_prioq_reshuffle(s);
2481
2482 return 1;
2483}
2484
2485static int event_source_online(
2486 sd_event_source *s,
2487 int enabled,
2488 bool ratelimited) {
2489
2490 bool was_online;
2491 int r;
2492
2493 assert(s);
2494 assert(enabled != SD_EVENT_OFF || !ratelimited);
2495
2496 /* Unset the pending flag when this event source is enabled */
2497 if (s->enabled == SD_EVENT_OFF &&
2498 enabled != SD_EVENT_OFF &&
2499 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2500 r = source_set_pending(s, false);
2501 if (r < 0)
2502 return r;
2503 }
2504
2505 /* Are we really ready for onlining? */
2506 if (enabled == SD_EVENT_OFF || ratelimited) {
2507 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2508 s->enabled = enabled;
2509 s->ratelimited = ratelimited;
2510 return 0;
2511 }
2512
2513 was_online = event_source_is_online(s);
2514
2515 switch (s->type) {
2516 case SOURCE_IO:
2517 r = source_io_register(s, enabled, s->io.events);
2518 if (r < 0)
2519 return r;
2520 break;
2521
2522 case SOURCE_SIGNAL:
2523 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2524 if (r < 0) {
2525 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2526 return r;
2527 }
2528
2529 break;
2530
2531 case SOURCE_CHILD:
2532 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2533 /* yes, we have pidfd */
2534
2535 r = source_child_pidfd_register(s, enabled);
2536 if (r < 0)
2537 return r;
2538 } else {
2539 /* no pidfd, or something other to watch for than WEXITED */
2540
2541 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2542 if (r < 0) {
2543 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2544 return r;
2545 }
2546 }
2547
2548 if (!was_online)
2549 s->event->n_online_child_sources++;
2550 break;
2551
2552 case SOURCE_TIME_REALTIME:
2553 case SOURCE_TIME_BOOTTIME:
2554 case SOURCE_TIME_MONOTONIC:
2555 case SOURCE_TIME_REALTIME_ALARM:
2556 case SOURCE_TIME_BOOTTIME_ALARM:
2557 case SOURCE_EXIT:
2558 case SOURCE_DEFER:
2559 case SOURCE_POST:
2560 case SOURCE_INOTIFY:
2561 break;
2562
2563 default:
2564 assert_not_reached();
2565 }
2566
2567 s->enabled = enabled;
2568 s->ratelimited = ratelimited;
2569
2570 /* Non-failing operations below */
2571 if (s->type == SOURCE_EXIT)
2572 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2573
2574 /* Always reshuffle time prioq, as the ratelimited flag may be changed. */
2575 event_source_time_prioq_reshuffle(s);
2576
2577 return 1;
2578}
2579
2580_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2581 int r;
2582
2583 assert_return(s, -EINVAL);
2584 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2585 assert_return(!event_pid_changed(s->event), -ECHILD);
2586
2587 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2588 if (s->event->state == SD_EVENT_FINISHED)
2589 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2590
2591 if (s->enabled == m) /* No change? */
2592 return 0;
2593
2594 if (m == SD_EVENT_OFF)
2595 r = event_source_offline(s, m, s->ratelimited);
2596 else {
2597 if (s->enabled != SD_EVENT_OFF) {
2598 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2599 * event source is already enabled after all. */
2600 s->enabled = m;
2601 return 0;
2602 }
2603
2604 r = event_source_online(s, m, s->ratelimited);
2605 }
2606 if (r < 0)
2607 return r;
2608
2609 event_source_pp_prioq_reshuffle(s);
2610 return 0;
2611}
2612
2613_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2614 assert_return(s, -EINVAL);
2615 assert_return(usec, -EINVAL);
2616 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2617 assert_return(!event_pid_changed(s->event), -ECHILD);
2618
2619 *usec = s->time.next;
2620 return 0;
2621}
2622
2623_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2624 int r;
2625
2626 assert_return(s, -EINVAL);
2627 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2628 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2629 assert_return(!event_pid_changed(s->event), -ECHILD);
2630
2631 r = source_set_pending(s, false);
2632 if (r < 0)
2633 return r;
2634
2635 s->time.next = usec;
2636
2637 event_source_time_prioq_reshuffle(s);
2638 return 0;
2639}
2640
2641_public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2642 usec_t t;
2643 int r;
2644
2645 assert_return(s, -EINVAL);
2646 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2647
2648 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2649 if (r < 0)
2650 return r;
2651
2652 usec = usec_add(t, usec);
2653 if (usec == USEC_INFINITY)
2654 return -EOVERFLOW;
2655
2656 return sd_event_source_set_time(s, usec);
2657}
2658
2659_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2660 assert_return(s, -EINVAL);
2661 assert_return(usec, -EINVAL);
2662 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2663 assert_return(!event_pid_changed(s->event), -ECHILD);
2664
2665 *usec = s->time.accuracy;
2666 return 0;
2667}
2668
2669_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2670 int r;
2671
2672 assert_return(s, -EINVAL);
2673 assert_return(usec != UINT64_MAX, -EINVAL);
2674 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2675 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2676 assert_return(!event_pid_changed(s->event), -ECHILD);
2677
2678 r = source_set_pending(s, false);
2679 if (r < 0)
2680 return r;
2681
2682 if (usec == 0)
2683 usec = DEFAULT_ACCURACY_USEC;
2684
2685 s->time.accuracy = usec;
2686
2687 event_source_time_prioq_reshuffle(s);
2688 return 0;
2689}
2690
2691_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2692 assert_return(s, -EINVAL);
2693 assert_return(clock, -EINVAL);
2694 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2695 assert_return(!event_pid_changed(s->event), -ECHILD);
2696
2697 *clock = event_source_type_to_clock(s->type);
2698 return 0;
2699}
2700
2701_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2702 assert_return(s, -EINVAL);
2703 assert_return(pid, -EINVAL);
2704 assert_return(s->type == SOURCE_CHILD, -EDOM);
2705 assert_return(!event_pid_changed(s->event), -ECHILD);
2706
2707 *pid = s->child.pid;
2708 return 0;
2709}
2710
2711_public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2712 assert_return(s, -EINVAL);
2713 assert_return(s->type == SOURCE_CHILD, -EDOM);
2714 assert_return(!event_pid_changed(s->event), -ECHILD);
2715
2716 if (s->child.pidfd < 0)
2717 return -EOPNOTSUPP;
2718
2719 return s->child.pidfd;
2720}
2721
2722_public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2723 assert_return(s, -EINVAL);
2724 assert_return(s->type == SOURCE_CHILD, -EDOM);
2725 assert_return(!event_pid_changed(s->event), -ECHILD);
2726 assert_return(SIGNAL_VALID(sig), -EINVAL);
2727
2728 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2729 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2730 * available. */
2731 if (s->child.exited)
2732 return -ESRCH;
2733
2734 if (s->child.pidfd >= 0) {
2735 siginfo_t copy;
2736
2737 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2738 * structure here */
2739 if (si)
2740 copy = *si;
2741
2742 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2743 /* Let's propagate the error only if the system call is not implemented or prohibited */
2744 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2745 return -errno;
2746 } else
2747 return 0;
2748 }
2749
2750 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2751 * this here. */
2752 if (flags != 0)
2753 return -EOPNOTSUPP;
2754
2755 if (si) {
2756 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2757 siginfo_t copy = *si;
2758
2759 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2760 return -errno;
2761 } else if (kill(s->child.pid, sig) < 0)
2762 return -errno;
2763
2764 return 0;
2765}
2766
2767_public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2768 assert_return(s, -EINVAL);
2769 assert_return(s->type == SOURCE_CHILD, -EDOM);
2770
2771 if (s->child.pidfd < 0)
2772 return -EOPNOTSUPP;
2773
2774 return s->child.pidfd_owned;
2775}
2776
2777_public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2778 assert_return(s, -EINVAL);
2779 assert_return(s->type == SOURCE_CHILD, -EDOM);
2780
2781 if (s->child.pidfd < 0)
2782 return -EOPNOTSUPP;
2783
2784 s->child.pidfd_owned = own;
2785 return 0;
2786}
2787
2788_public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2789 assert_return(s, -EINVAL);
2790 assert_return(s->type == SOURCE_CHILD, -EDOM);
2791
2792 return s->child.process_owned;
2793}
2794
2795_public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2796 assert_return(s, -EINVAL);
2797 assert_return(s->type == SOURCE_CHILD, -EDOM);
2798
2799 s->child.process_owned = own;
2800 return 0;
2801}
2802
2803_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2804 assert_return(s, -EINVAL);
2805 assert_return(mask, -EINVAL);
2806 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2807 assert_return(!event_pid_changed(s->event), -ECHILD);
2808
2809 *mask = s->inotify.mask;
2810 return 0;
2811}
2812
2813_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2814 int r;
2815
2816 assert_return(s, -EINVAL);
2817 assert_return(s->type != SOURCE_EXIT, -EDOM);
2818 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2819 assert_return(!event_pid_changed(s->event), -ECHILD);
2820
2821 if (s->prepare == callback)
2822 return 0;
2823
2824 if (callback && s->prepare) {
2825 s->prepare = callback;
2826 return 0;
2827 }
2828
2829 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2830 if (r < 0)
2831 return r;
2832
2833 s->prepare = callback;
2834
2835 if (callback) {
2836 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2837 if (r < 0)
2838 return r;
2839 } else
2840 prioq_remove(s->event->prepare, s, &s->prepare_index);
2841
2842 return 0;
2843}
2844
2845_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2846 assert_return(s, NULL);
2847
2848 return s->userdata;
2849}
2850
2851_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2852 void *ret;
2853
2854 assert_return(s, NULL);
2855
2856 ret = s->userdata;
2857 s->userdata = userdata;
2858
2859 return ret;
2860}
2861
2862static int event_source_enter_ratelimited(sd_event_source *s) {
2863 int r;
2864
2865 assert(s);
2866
2867 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
2868 * the end of the rate limit time window, much as if it was a timer event source. */
2869
2870 if (s->ratelimited)
2871 return 0; /* Already ratelimited, this is a NOP hence */
2872
2873 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
2874 r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
2875 if (r < 0)
2876 return r;
2877
2878 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
2879 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
2880 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
2881 if (EVENT_SOURCE_IS_TIME(s->type))
2882 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2883
2884 /* Now, let's add the event source to the monotonic clock instead */
2885 r = event_source_time_prioq_put(s, &s->event->monotonic);
2886 if (r < 0)
2887 goto fail;
2888
2889 /* And let's take the event source officially offline */
2890 r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
2891 if (r < 0) {
2892 event_source_time_prioq_remove(s, &s->event->monotonic);
2893 goto fail;
2894 }
2895
2896 event_source_pp_prioq_reshuffle(s);
2897
2898 log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
2899 return 0;
2900
2901fail:
2902 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
2903 * space for it should already be allocated. */
2904 if (EVENT_SOURCE_IS_TIME(s->type))
2905 assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
2906
2907 return r;
2908}
2909
2910static int event_source_leave_ratelimit(sd_event_source *s, bool run_callback) {
2911 int r;
2912
2913 assert(s);
2914
2915 if (!s->ratelimited)
2916 return 0;
2917
2918 /* Let's take the event source out of the monotonic prioq first. */
2919 event_source_time_prioq_remove(s, &s->event->monotonic);
2920
2921 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
2922 if (EVENT_SOURCE_IS_TIME(s->type)) {
2923 r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
2924 if (r < 0)
2925 goto fail;
2926 }
2927
2928 /* Let's try to take it online again. */
2929 r = event_source_online(s, s->enabled, /* ratelimited= */ false);
2930 if (r < 0) {
2931 /* Do something roughly sensible when this failed: undo the two prioq ops above */
2932 if (EVENT_SOURCE_IS_TIME(s->type))
2933 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2934
2935 goto fail;
2936 }
2937
2938 event_source_pp_prioq_reshuffle(s);
2939 ratelimit_reset(&s->rate_limit);
2940
2941 log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
2942
2943 if (run_callback && s->ratelimit_expire_callback) {
2944 s->dispatching = true;
2945 r = s->ratelimit_expire_callback(s, s->userdata);
2946 s->dispatching = false;
2947
2948 if (r < 0) {
2949 log_debug_errno(r, "Ratelimit expiry callback of event source %s (type %s) returned error, %s: %m",
2950 strna(s->description),
2951 event_source_type_to_string(s->type),
2952 s->exit_on_failure ? "exiting" : "disabling");
2953
2954 if (s->exit_on_failure)
2955 (void) sd_event_exit(s->event, r);
2956 }
2957
2958 if (s->n_ref == 0)
2959 source_free(s);
2960 else if (r < 0)
2961 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
2962
2963 return 1;
2964 }
2965
2966 return 0;
2967
2968fail:
2969 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
2970 * simply put it back in it, maybe we can then process it more successfully next iteration. */
2971 assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
2972
2973 return r;
2974}
2975
2976static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2977 usec_t c;
2978 assert(e);
2979 assert(a <= b);
2980
2981 if (a <= 0)
2982 return 0;
2983 if (a >= USEC_INFINITY)
2984 return USEC_INFINITY;
2985
2986 if (b <= a + 1)
2987 return a;
2988
2989 initialize_perturb(e);
2990
2991 /*
2992 Find a good time to wake up again between times a and b. We
2993 have two goals here:
2994
2995 a) We want to wake up as seldom as possible, hence prefer
2996 later times over earlier times.
2997
2998 b) But if we have to wake up, then let's make sure to
2999 dispatch as much as possible on the entire system.
3000
3001 We implement this by waking up everywhere at the same time
3002 within any given minute if we can, synchronised via the
3003 perturbation value determined from the boot ID. If we can't,
3004 then we try to find the same spot in every 10s, then 1s and
3005 then 250ms step. Otherwise, we pick the last possible time
3006 to wake up.
3007 */
3008
3009 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
3010 if (c >= b) {
3011 if (_unlikely_(c < USEC_PER_MINUTE))
3012 return b;
3013
3014 c -= USEC_PER_MINUTE;
3015 }
3016
3017 if (c >= a)
3018 return c;
3019
3020 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
3021 if (c >= b) {
3022 if (_unlikely_(c < USEC_PER_SEC*10))
3023 return b;
3024
3025 c -= USEC_PER_SEC*10;
3026 }
3027
3028 if (c >= a)
3029 return c;
3030
3031 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
3032 if (c >= b) {
3033 if (_unlikely_(c < USEC_PER_SEC))
3034 return b;
3035
3036 c -= USEC_PER_SEC;
3037 }
3038
3039 if (c >= a)
3040 return c;
3041
3042 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
3043 if (c >= b) {
3044 if (_unlikely_(c < USEC_PER_MSEC*250))
3045 return b;
3046
3047 c -= USEC_PER_MSEC*250;
3048 }
3049
3050 if (c >= a)
3051 return c;
3052
3053 return b;
3054}
3055
3056static int event_arm_timer(
3057 sd_event *e,
3058 struct clock_data *d) {
3059
3060 struct itimerspec its = {};
3061 sd_event_source *a, *b;
3062 usec_t t;
3063
3064 assert(e);
3065 assert(d);
3066
3067 if (!d->needs_rearm)
3068 return 0;
3069
3070 d->needs_rearm = false;
3071
3072 a = prioq_peek(d->earliest);
3073 assert(!a || EVENT_SOURCE_USES_TIME_PRIOQ(a->type));
3074 if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
3075
3076 if (d->fd < 0)
3077 return 0;
3078
3079 if (d->next == USEC_INFINITY)
3080 return 0;
3081
3082 /* disarm */
3083 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3084 return -errno;
3085
3086 d->next = USEC_INFINITY;
3087 return 0;
3088 }
3089
3090 b = prioq_peek(d->latest);
3091 assert(!b || EVENT_SOURCE_USES_TIME_PRIOQ(b->type));
3092 assert(b && b->enabled != SD_EVENT_OFF);
3093
3094 t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
3095 if (d->next == t)
3096 return 0;
3097
3098 assert_se(d->fd >= 0);
3099
3100 if (t == 0) {
3101 /* We don' want to disarm here, just mean some time looooong ago. */
3102 its.it_value.tv_sec = 0;
3103 its.it_value.tv_nsec = 1;
3104 } else
3105 timespec_store(&its.it_value, t);
3106
3107 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3108 return -errno;
3109
3110 d->next = t;
3111 return 0;
3112}
3113
3114static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
3115 assert(e);
3116 assert(s);
3117 assert(s->type == SOURCE_IO);
3118
3119 /* If the event source was already pending, we just OR in the
3120 * new revents, otherwise we reset the value. The ORing is
3121 * necessary to handle EPOLLONESHOT events properly where
3122 * readability might happen independently of writability, and
3123 * we need to keep track of both */
3124
3125 if (s->pending)
3126 s->io.revents |= revents;
3127 else
3128 s->io.revents = revents;
3129
3130 return source_set_pending(s, true);
3131}
3132
3133static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
3134 uint64_t x;
3135 ssize_t ss;
3136
3137 assert(e);
3138 assert(fd >= 0);
3139
3140 assert_return(events == EPOLLIN, -EIO);
3141
3142 ss = read(fd, &x, sizeof(x));
3143 if (ss < 0) {
3144 if (ERRNO_IS_TRANSIENT(errno))
3145 return 0;
3146
3147 return -errno;
3148 }
3149
3150 if (_unlikely_(ss != sizeof(x)))
3151 return -EIO;
3152
3153 if (next)
3154 *next = USEC_INFINITY;
3155
3156 return 0;
3157}
3158
3159static int process_timer(
3160 sd_event *e,
3161 usec_t n,
3162 struct clock_data *d) {
3163
3164 sd_event_source *s;
3165 bool callback_invoked = false;
3166 int r;
3167
3168 assert(e);
3169 assert(d);
3170
3171 for (;;) {
3172 s = prioq_peek(d->earliest);
3173 assert(!s || EVENT_SOURCE_USES_TIME_PRIOQ(s->type));
3174
3175 if (!s || time_event_source_next(s) > n)
3176 break;
3177
3178 if (s->ratelimited) {
3179 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3180 * again. */
3181 assert(s->ratelimited);
3182
3183 r = event_source_leave_ratelimit(s, /* run_callback */ true);
3184 if (r < 0)
3185 return r;
3186 else if (r == 1)
3187 callback_invoked = true;
3188
3189 continue;
3190 }
3191
3192 if (s->enabled == SD_EVENT_OFF || s->pending)
3193 break;
3194
3195 r = source_set_pending(s, true);
3196 if (r < 0)
3197 return r;
3198
3199 event_source_time_prioq_reshuffle(s);
3200 }
3201
3202 return callback_invoked;
3203}
3204
3205static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3206 int64_t min_priority = threshold;
3207 bool something_new = false;
3208 sd_event_source *s;
3209 int r;
3210
3211 assert(e);
3212 assert(ret_min_priority);
3213
3214 if (!e->need_process_child) {
3215 *ret_min_priority = min_priority;
3216 return 0;
3217 }
3218
3219 e->need_process_child = false;
3220
3221 /*
3222 So, this is ugly. We iteratively invoke waitid() with P_PID
3223 + WNOHANG for each PID we wait for, instead of using
3224 P_ALL. This is because we only want to get child
3225 information of very specific child processes, and not all
3226 of them. We might not have processed the SIGCHLD even of a
3227 previous invocation and we don't want to maintain a
3228 unbounded *per-child* event queue, hence we really don't
3229 want anything flushed out of the kernel's queue that we
3230 don't care about. Since this is O(n) this means that if you
3231 have a lot of processes you probably want to handle SIGCHLD
3232 yourself.
3233
3234 We do not reap the children here (by using WNOWAIT), this
3235 is only done after the event source is dispatched so that
3236 the callback still sees the process as a zombie.
3237 */
3238
3239 HASHMAP_FOREACH(s, e->child_sources) {
3240 assert(s->type == SOURCE_CHILD);
3241
3242 if (s->priority > threshold)
3243 continue;
3244
3245 if (s->pending)
3246 continue;
3247
3248 if (event_source_is_offline(s))
3249 continue;
3250
3251 if (s->child.exited)
3252 continue;
3253
3254 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
3255 continue;
3256
3257 zero(s->child.siginfo);
3258 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
3259 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
3260 return negative_errno();
3261
3262 if (s->child.siginfo.si_pid != 0) {
3263 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3264
3265 if (zombie)
3266 s->child.exited = true;
3267
3268 if (!zombie && (s->child.options & WEXITED)) {
3269 /* If the child isn't dead then let's
3270 * immediately remove the state change
3271 * from the queue, since there's no
3272 * benefit in leaving it queued */
3273
3274 assert(s->child.options & (WSTOPPED|WCONTINUED));
3275 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
3276 }
3277
3278 r = source_set_pending(s, true);
3279 if (r < 0)
3280 return r;
3281 if (r > 0) {
3282 something_new = true;
3283 min_priority = MIN(min_priority, s->priority);
3284 }
3285 }
3286 }
3287
3288 *ret_min_priority = min_priority;
3289 return something_new;
3290}
3291
3292static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
3293 assert(e);
3294 assert(s);
3295 assert(s->type == SOURCE_CHILD);
3296
3297 if (s->pending)
3298 return 0;
3299
3300 if (event_source_is_offline(s))
3301 return 0;
3302
3303 if (!EVENT_SOURCE_WATCH_PIDFD(s))
3304 return 0;
3305
3306 zero(s->child.siginfo);
3307 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3308 return -errno;
3309
3310 if (s->child.siginfo.si_pid == 0)
3311 return 0;
3312
3313 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
3314 s->child.exited = true;
3315
3316 return source_set_pending(s, true);
3317}
3318
3319static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
3320 int r;
3321
3322 assert(e);
3323 assert(d);
3324 assert_return(events == EPOLLIN, -EIO);
3325 assert(min_priority);
3326
3327 /* If there's a signal queued on this priority and SIGCHLD is
3328 on this priority too, then make sure to recheck the
3329 children we watch. This is because we only ever dequeue
3330 the first signal per priority, and if we dequeue one, and
3331 SIGCHLD might be enqueued later we wouldn't know, but we
3332 might have higher priority children we care about hence we
3333 need to check that explicitly. */
3334
3335 if (sigismember(&d->sigset, SIGCHLD))
3336 e->need_process_child = true;
3337
3338 /* If there's already an event source pending for this
3339 * priority we don't read another */
3340 if (d->current)
3341 return 0;
3342
3343 for (;;) {
3344 struct signalfd_siginfo si;
3345 ssize_t n;
3346 sd_event_source *s = NULL;
3347
3348 n = read(d->fd, &si, sizeof(si));
3349 if (n < 0) {
3350 if (ERRNO_IS_TRANSIENT(errno))
3351 return 0;
3352
3353 return -errno;
3354 }
3355
3356 if (_unlikely_(n != sizeof(si)))
3357 return -EIO;
3358
3359 assert(SIGNAL_VALID(si.ssi_signo));
3360
3361 if (e->signal_sources)
3362 s = e->signal_sources[si.ssi_signo];
3363 if (!s)
3364 continue;
3365 if (s->pending)
3366 continue;
3367
3368 s->signal.siginfo = si;
3369 d->current = s;
3370
3371 r = source_set_pending(s, true);
3372 if (r < 0)
3373 return r;
3374 if (r > 0 && *min_priority >= s->priority) {
3375 *min_priority = s->priority;
3376 return 1; /* an event source with smaller priority is queued. */
3377 }
3378
3379 return 0;
3380 }
3381}
3382
3383static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
3384 ssize_t n;
3385
3386 assert(e);
3387 assert(d);
3388
3389 assert_return(revents == EPOLLIN, -EIO);
3390
3391 /* If there's already an event source pending for this priority, don't read another */
3392 if (d->n_pending > 0)
3393 return 0;
3394
3395 /* Is the read buffer non-empty? If so, let's not read more */
3396 if (d->buffer_filled > 0)
3397 return 0;
3398
3399 if (d->priority > threshold)
3400 return 0;
3401
3402 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3403 if (n < 0) {
3404 if (ERRNO_IS_TRANSIENT(errno))
3405 return 0;
3406
3407 return -errno;
3408 }
3409
3410 assert(n > 0);
3411 d->buffer_filled = (size_t) n;
3412 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3413
3414 return 1;
3415}
3416
3417static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3418 assert(e);
3419 assert(d);
3420 assert(sz <= d->buffer_filled);
3421
3422 if (sz == 0)
3423 return;
3424
3425 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3426 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3427 d->buffer_filled -= sz;
3428
3429 if (d->buffer_filled == 0)
3430 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3431}
3432
3433static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3434 int r;
3435
3436 assert(e);
3437 assert(d);
3438
3439 /* If there's already an event source pending for this priority, don't read another */
3440 if (d->n_pending > 0)
3441 return 0;
3442
3443 while (d->buffer_filled > 0) {
3444 size_t sz;
3445
3446 /* Let's validate that the event structures are complete */
3447 if (d->buffer_filled < offsetof(struct inotify_event, name))
3448 return -EIO;
3449
3450 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3451 if (d->buffer_filled < sz)
3452 return -EIO;
3453
3454 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3455 struct inode_data *inode_data;
3456
3457 /* The queue overran, let's pass this event to all event sources connected to this inotify
3458 * object */
3459
3460 HASHMAP_FOREACH(inode_data, d->inodes)
3461 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3462
3463 if (event_source_is_offline(s))
3464 continue;
3465
3466 r = source_set_pending(s, true);
3467 if (r < 0)
3468 return r;
3469 }
3470 } else {
3471 struct inode_data *inode_data;
3472
3473 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3474 * our watch descriptor table. */
3475 if (d->buffer.ev.mask & IN_IGNORED) {
3476
3477 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3478 if (!inode_data) {
3479 event_inotify_data_drop(e, d, sz);
3480 continue;
3481 }
3482
3483 /* The watch descriptor was removed by the kernel, let's drop it here too */
3484 inode_data->wd = -1;
3485 } else {
3486 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3487 if (!inode_data) {
3488 event_inotify_data_drop(e, d, sz);
3489 continue;
3490 }
3491 }
3492
3493 /* Trigger all event sources that are interested in these events. Also trigger all event
3494 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3495 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3496
3497 if (event_source_is_offline(s))
3498 continue;
3499
3500 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3501 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3502 continue;
3503
3504 r = source_set_pending(s, true);
3505 if (r < 0)
3506 return r;
3507 }
3508 }
3509
3510 /* Something pending now? If so, let's finish, otherwise let's read more. */
3511 if (d->n_pending > 0)
3512 return 1;
3513 }
3514
3515 return 0;
3516}
3517
3518static int process_inotify(sd_event *e) {
3519 int r, done = 0;
3520
3521 assert(e);
3522
3523 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3524 r = event_inotify_data_process(e, d);
3525 if (r < 0)
3526 return r;
3527 if (r > 0)
3528 done ++;
3529 }
3530
3531 return done;
3532}
3533
3534static int source_dispatch(sd_event_source *s) {
3535 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
3536 EventSourceType saved_type;
3537 int r = 0;
3538
3539 assert(s);
3540 assert(s->pending || s->type == SOURCE_EXIT);
3541
3542 /* Save the event source type, here, so that we still know it after the event callback which might
3543 * invalidate the event. */
3544 saved_type = s->type;
3545
3546 /* Similarly, store a reference to the event loop object, so that we can still access it after the
3547 * callback might have invalidated/disconnected the event source. */
3548 saved_event = sd_event_ref(s->event);
3549
3550 /* Check if we hit the ratelimit for this event source, and if so, let's disable it. */
3551 assert(!s->ratelimited);
3552 if (!ratelimit_below(&s->rate_limit)) {
3553 r = event_source_enter_ratelimited(s);
3554 if (r < 0)
3555 return r;
3556
3557 return 1;
3558 }
3559
3560 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3561 r = source_set_pending(s, false);
3562 if (r < 0)
3563 return r;
3564 }
3565
3566 if (s->type != SOURCE_POST) {
3567 sd_event_source *z;
3568
3569 /* If we execute a non-post source, let's mark all post sources as pending. */
3570
3571 SET_FOREACH(z, s->event->post_sources) {
3572 if (event_source_is_offline(z))
3573 continue;
3574
3575 r = source_set_pending(z, true);
3576 if (r < 0)
3577 return r;
3578 }
3579 }
3580
3581 if (s->enabled == SD_EVENT_ONESHOT) {
3582 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3583 if (r < 0)
3584 return r;
3585 }
3586
3587 s->dispatching = true;
3588
3589 switch (s->type) {
3590
3591 case SOURCE_IO:
3592 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3593 break;
3594
3595 case SOURCE_TIME_REALTIME:
3596 case SOURCE_TIME_BOOTTIME:
3597 case SOURCE_TIME_MONOTONIC:
3598 case SOURCE_TIME_REALTIME_ALARM:
3599 case SOURCE_TIME_BOOTTIME_ALARM:
3600 r = s->time.callback(s, s->time.next, s->userdata);
3601 break;
3602
3603 case SOURCE_SIGNAL:
3604 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3605 break;
3606
3607 case SOURCE_CHILD: {
3608 bool zombie;
3609
3610 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3611
3612 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3613
3614 /* Now, reap the PID for good. */
3615 if (zombie) {
3616 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3617 s->child.waited = true;
3618 }
3619
3620 break;
3621 }
3622
3623 case SOURCE_DEFER:
3624 r = s->defer.callback(s, s->userdata);
3625 break;
3626
3627 case SOURCE_POST:
3628 r = s->post.callback(s, s->userdata);
3629 break;
3630
3631 case SOURCE_EXIT:
3632 r = s->exit.callback(s, s->userdata);
3633 break;
3634
3635 case SOURCE_INOTIFY: {
3636 struct sd_event *e = s->event;
3637 struct inotify_data *d;
3638 size_t sz;
3639
3640 assert(s->inotify.inode_data);
3641 assert_se(d = s->inotify.inode_data->inotify_data);
3642
3643 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3644 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3645 assert(d->buffer_filled >= sz);
3646
3647 /* If the inotify callback destroys the event source then this likely means we don't need to
3648 * watch the inode anymore, and thus also won't need the inotify object anymore. But if we'd
3649 * free it immediately, then we couldn't drop the event from the inotify event queue without
3650 * memory corruption anymore, as below. Hence, let's not free it immediately, but mark it
3651 * "busy" with a counter (which will ensure it's not GC'ed away prematurely). Let's then
3652 * explicitly GC it after we are done dropping the inotify event from the buffer. */
3653 d->n_busy++;
3654 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3655 d->n_busy--;
3656
3657 /* When no event is pending anymore on this inotify object, then let's drop the event from
3658 * the inotify event queue buffer. */
3659 if (d->n_pending == 0)
3660 event_inotify_data_drop(e, d, sz);
3661
3662 /* Now we don't want to access 'd' anymore, it's OK to GC now. */
3663 event_gc_inotify_data(e, d);
3664 break;
3665 }
3666
3667 case SOURCE_WATCHDOG:
3668 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3669 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3670 assert_not_reached();
3671 }
3672
3673 s->dispatching = false;
3674
3675 if (r < 0) {
3676 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3677 strna(s->description),
3678 event_source_type_to_string(saved_type),
3679 s->exit_on_failure ? "exiting" : "disabling");
3680
3681 if (s->exit_on_failure)
3682 (void) sd_event_exit(saved_event, r);
3683 }
3684
3685 if (s->n_ref == 0)
3686 source_free(s);
3687 else if (r < 0)
3688 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
3689
3690 return 1;
3691}
3692
3693static int event_prepare(sd_event *e) {
3694 int r;
3695
3696 assert(e);
3697
3698 for (;;) {
3699 sd_event_source *s;
3700
3701 s = prioq_peek(e->prepare);
3702 if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
3703 break;
3704
3705 s->prepare_iteration = e->iteration;
3706 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3707 if (r < 0)
3708 return r;
3709
3710 assert(s->prepare);
3711
3712 s->dispatching = true;
3713 r = s->prepare(s, s->userdata);
3714 s->dispatching = false;
3715
3716 if (r < 0) {
3717 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3718 strna(s->description),
3719 event_source_type_to_string(s->type),
3720 s->exit_on_failure ? "exiting" : "disabling");
3721
3722 if (s->exit_on_failure)
3723 (void) sd_event_exit(e, r);
3724 }
3725
3726 if (s->n_ref == 0)
3727 source_free(s);
3728 else if (r < 0)
3729 assert_se(sd_event_source_set_enabled(s, SD_EVENT_OFF) >= 0);
3730 }
3731
3732 return 0;
3733}
3734
3735static int dispatch_exit(sd_event *e) {
3736 sd_event_source *p;
3737 int r;
3738
3739 assert(e);
3740
3741 p = prioq_peek(e->exit);
3742 assert(!p || p->type == SOURCE_EXIT);
3743
3744 if (!p || event_source_is_offline(p)) {
3745 e->state = SD_EVENT_FINISHED;
3746 return 0;
3747 }
3748
3749 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3750 e->iteration++;
3751 e->state = SD_EVENT_EXITING;
3752 r = source_dispatch(p);
3753 e->state = SD_EVENT_INITIAL;
3754 return r;
3755}
3756
3757static sd_event_source* event_next_pending(sd_event *e) {
3758 sd_event_source *p;
3759
3760 assert(e);
3761
3762 p = prioq_peek(e->pending);
3763 if (!p)
3764 return NULL;
3765
3766 if (event_source_is_offline(p))
3767 return NULL;
3768
3769 return p;
3770}
3771
3772static int arm_watchdog(sd_event *e) {
3773 struct itimerspec its = {};
3774 usec_t t;
3775
3776 assert(e);
3777 assert(e->watchdog_fd >= 0);
3778
3779 t = sleep_between(e,
3780 usec_add(e->watchdog_last, (e->watchdog_period / 2)),
3781 usec_add(e->watchdog_last, (e->watchdog_period * 3 / 4)));
3782
3783 timespec_store(&its.it_value, t);
3784
3785 /* Make sure we never set the watchdog to 0, which tells the
3786 * kernel to disable it. */
3787 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3788 its.it_value.tv_nsec = 1;
3789
3790 return RET_NERRNO(timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL));
3791}
3792
3793static int process_watchdog(sd_event *e) {
3794 assert(e);
3795
3796 if (!e->watchdog)
3797 return 0;
3798
3799 /* Don't notify watchdog too often */
3800 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3801 return 0;
3802
3803 sd_notify(false, "WATCHDOG=1");
3804 e->watchdog_last = e->timestamp.monotonic;
3805
3806 return arm_watchdog(e);
3807}
3808
3809static void event_close_inode_data_fds(sd_event *e) {
3810 struct inode_data *d;
3811
3812 assert(e);
3813
3814 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3815 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3816 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3817 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3818 * compromise. */
3819
3820 while ((d = e->inode_data_to_close)) {
3821 assert(d->fd >= 0);
3822 d->fd = safe_close(d->fd);
3823
3824 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3825 }
3826}
3827
3828_public_ int sd_event_prepare(sd_event *e) {
3829 int r;
3830
3831 assert_return(e, -EINVAL);
3832 assert_return(e = event_resolve(e), -ENOPKG);
3833 assert_return(!event_pid_changed(e), -ECHILD);
3834 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3835 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3836
3837 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3838 * this check here once, since gettid() is typically not cached, and thus want to minimize
3839 * syscalls */
3840 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3841
3842 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
3843 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3844
3845 if (e->exit_requested)
3846 goto pending;
3847
3848 e->iteration++;
3849
3850 e->state = SD_EVENT_PREPARING;
3851 r = event_prepare(e);
3852 e->state = SD_EVENT_INITIAL;
3853 if (r < 0)
3854 return r;
3855
3856 r = event_arm_timer(e, &e->realtime);
3857 if (r < 0)
3858 return r;
3859
3860 r = event_arm_timer(e, &e->boottime);
3861 if (r < 0)
3862 return r;
3863
3864 r = event_arm_timer(e, &e->monotonic);
3865 if (r < 0)
3866 return r;
3867
3868 r = event_arm_timer(e, &e->realtime_alarm);
3869 if (r < 0)
3870 return r;
3871
3872 r = event_arm_timer(e, &e->boottime_alarm);
3873 if (r < 0)
3874 return r;
3875
3876 event_close_inode_data_fds(e);
3877
3878 if (event_next_pending(e) || e->need_process_child)
3879 goto pending;
3880
3881 e->state = SD_EVENT_ARMED;
3882
3883 return 0;
3884
3885pending:
3886 e->state = SD_EVENT_ARMED;
3887 r = sd_event_wait(e, 0);
3888 if (r == 0)
3889 e->state = SD_EVENT_ARMED;
3890
3891 return r;
3892}
3893
3894static int epoll_wait_usec(
3895 int fd,
3896 struct epoll_event *events,
3897 int maxevents,
3898 usec_t timeout) {
3899
3900 int msec;
3901#if 0
3902 static bool epoll_pwait2_absent = false;
3903 int r;
3904
3905 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not.
3906 *
3907 * FIXME: this is temporarily disabled until epoll_pwait2() becomes more widely available.
3908 * See https://github.com/systemd/systemd/pull/18973 and
3909 * https://github.com/systemd/systemd/issues/19052. */
3910
3911 if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
3912 r = epoll_pwait2(fd,
3913 events,
3914 maxevents,
3915 TIMESPEC_STORE(timeout),
3916 NULL);
3917 if (r >= 0)
3918 return r;
3919 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
3920 return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
3921 * supported. */
3922
3923 epoll_pwait2_absent = true;
3924 }
3925#endif
3926
3927 if (timeout == USEC_INFINITY)
3928 msec = -1;
3929 else {
3930 usec_t k;
3931
3932 k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
3933 if (k >= INT_MAX)
3934 msec = INT_MAX; /* Saturate */
3935 else
3936 msec = (int) k;
3937 }
3938
3939 return RET_NERRNO(epoll_wait(fd, events, maxevents, msec));
3940}
3941
3942static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
3943 size_t n_event_queue, m, n_event_max;
3944 int64_t min_priority = threshold;
3945 bool something_new = false;
3946 int r;
3947
3948 assert(e);
3949 assert(ret_min_priority);
3950
3951 n_event_queue = MAX(e->n_sources, 1u);
3952 if (!GREEDY_REALLOC(e->event_queue, n_event_queue))
3953 return -ENOMEM;
3954
3955 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
3956
3957 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3958 if (e->inotify_data_buffered)
3959 timeout = 0;
3960
3961 for (;;) {
3962 r = epoll_wait_usec(
3963 e->epoll_fd,
3964 e->event_queue,
3965 n_event_max,
3966 timeout);
3967 if (r < 0)
3968 return r;
3969
3970 m = (size_t) r;
3971
3972 if (m < n_event_max)
3973 break;
3974
3975 if (n_event_max >= n_event_queue * 10)
3976 break;
3977
3978 if (!GREEDY_REALLOC(e->event_queue, n_event_max + n_event_queue))
3979 return -ENOMEM;
3980
3981 n_event_max = MALLOC_ELEMENTSOF(e->event_queue);
3982 timeout = 0;
3983 }
3984
3985 /* Set timestamp only when this is called first time. */
3986 if (threshold == INT64_MAX)
3987 triple_timestamp_get(&e->timestamp);
3988
3989 for (size_t i = 0; i < m; i++) {
3990
3991 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3992 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
3993 else {
3994 WakeupType *t = e->event_queue[i].data.ptr;
3995
3996 switch (*t) {
3997
3998 case WAKEUP_EVENT_SOURCE: {
3999 sd_event_source *s = e->event_queue[i].data.ptr;
4000
4001 assert(s);
4002
4003 if (s->priority > threshold)
4004 continue;
4005
4006 min_priority = MIN(min_priority, s->priority);
4007
4008 switch (s->type) {
4009
4010 case SOURCE_IO:
4011 r = process_io(e, s, e->event_queue[i].events);
4012 break;
4013
4014 case SOURCE_CHILD:
4015 r = process_pidfd(e, s, e->event_queue[i].events);
4016 break;
4017
4018 default:
4019 assert_not_reached();
4020 }
4021
4022 break;
4023 }
4024
4025 case WAKEUP_CLOCK_DATA: {
4026 struct clock_data *d = e->event_queue[i].data.ptr;
4027
4028 assert(d);
4029
4030 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
4031 break;
4032 }
4033
4034 case WAKEUP_SIGNAL_DATA:
4035 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
4036 break;
4037
4038 case WAKEUP_INOTIFY_DATA:
4039 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
4040 break;
4041
4042 default:
4043 assert_not_reached();
4044 }
4045 }
4046 if (r < 0)
4047 return r;
4048 if (r > 0)
4049 something_new = true;
4050 }
4051
4052 *ret_min_priority = min_priority;
4053 return something_new;
4054}
4055
4056_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
4057 int r;
4058
4059 assert_return(e, -EINVAL);
4060 assert_return(e = event_resolve(e), -ENOPKG);
4061 assert_return(!event_pid_changed(e), -ECHILD);
4062 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4063 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
4064
4065 if (e->exit_requested) {
4066 e->state = SD_EVENT_PENDING;
4067 return 1;
4068 }
4069
4070 for (int64_t threshold = INT64_MAX; ; threshold--) {
4071 int64_t epoll_min_priority, child_min_priority;
4072
4073 /* There may be a possibility that new epoll (especially IO) and child events are
4074 * triggered just after process_epoll() call but before process_child(), and the new IO
4075 * events may have higher priority than the child events. To salvage these events,
4076 * let's call epoll_wait() again, but accepts only events with higher priority than the
4077 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
4078 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
4079 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
4080
4081 r = process_epoll(e, timeout, threshold, &epoll_min_priority);
4082 if (r == -EINTR) {
4083 e->state = SD_EVENT_PENDING;
4084 return 1;
4085 }
4086 if (r < 0)
4087 goto finish;
4088 if (r == 0 && threshold < INT64_MAX)
4089 /* No new epoll event. */
4090 break;
4091
4092 r = process_child(e, threshold, &child_min_priority);
4093 if (r < 0)
4094 goto finish;
4095 if (r == 0)
4096 /* No new child event. */
4097 break;
4098
4099 threshold = MIN(epoll_min_priority, child_min_priority);
4100 if (threshold == INT64_MIN)
4101 break;
4102
4103 timeout = 0;
4104 }
4105
4106 r = process_watchdog(e);
4107 if (r < 0)
4108 goto finish;
4109
4110 r = process_inotify(e);
4111 if (r < 0)
4112 goto finish;
4113
4114 r = process_timer(e, e->timestamp.realtime, &e->realtime);
4115 if (r < 0)
4116 goto finish;
4117
4118 r = process_timer(e, e->timestamp.boottime, &e->boottime);
4119 if (r < 0)
4120 goto finish;
4121
4122 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
4123 if (r < 0)
4124 goto finish;
4125
4126 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
4127 if (r < 0)
4128 goto finish;
4129
4130 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
4131 if (r < 0)
4132 goto finish;
4133 else if (r == 1) {
4134 /* Ratelimit expiry callback was called. Let's postpone processing pending sources and
4135 * put loop in the initial state in order to evaluate (in the next iteration) also sources
4136 * there were potentially re-enabled by the callback.
4137 *
4138 * Wondering why we treat only this invocation of process_timer() differently? Once event
4139 * source is ratelimited we essentially transform it into CLOCK_MONOTONIC timer hence
4140 * ratelimit expiry callback is never called for any other timer type. */
4141 r = 0;
4142 goto finish;
4143 }
4144
4145 if (event_next_pending(e)) {
4146 e->state = SD_EVENT_PENDING;
4147 return 1;
4148 }
4149
4150 r = 0;
4151
4152finish:
4153 e->state = SD_EVENT_INITIAL;
4154
4155 return r;
4156}
4157
4158_public_ int sd_event_dispatch(sd_event *e) {
4159 sd_event_source *p;
4160 int r;
4161
4162 assert_return(e, -EINVAL);
4163 assert_return(e = event_resolve(e), -ENOPKG);
4164 assert_return(!event_pid_changed(e), -ECHILD);
4165 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4166 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
4167
4168 if (e->exit_requested)
4169 return dispatch_exit(e);
4170
4171 p = event_next_pending(e);
4172 if (p) {
4173 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4174
4175 e->state = SD_EVENT_RUNNING;
4176 r = source_dispatch(p);
4177 e->state = SD_EVENT_INITIAL;
4178 return r;
4179 }
4180
4181 e->state = SD_EVENT_INITIAL;
4182
4183 return 1;
4184}
4185
4186static void event_log_delays(sd_event *e) {
4187 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
4188 size_t l, i;
4189
4190 p = b;
4191 l = sizeof(b);
4192 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
4193 l = strpcpyf(&p, l, "%u ", e->delays[i]);
4194 e->delays[i] = 0;
4195 }
4196 log_debug("Event loop iterations: %s", b);
4197}
4198
4199_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
4200 int r;
4201
4202 assert_return(e, -EINVAL);
4203 assert_return(e = event_resolve(e), -ENOPKG);
4204 assert_return(!event_pid_changed(e), -ECHILD);
4205 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4206 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
4207
4208 if (e->profile_delays && e->last_run_usec != 0) {
4209 usec_t this_run;
4210 unsigned l;
4211
4212 this_run = now(CLOCK_MONOTONIC);
4213
4214 l = log2u64(this_run - e->last_run_usec);
4215 assert(l < ELEMENTSOF(e->delays));
4216 e->delays[l]++;
4217
4218 if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
4219 event_log_delays(e);
4220 e->last_log_usec = this_run;
4221 }
4222 }
4223
4224 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4225 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4226
4227 r = sd_event_prepare(e);
4228 if (r == 0)
4229 /* There was nothing? Then wait... */
4230 r = sd_event_wait(e, timeout);
4231
4232 if (e->profile_delays)
4233 e->last_run_usec = now(CLOCK_MONOTONIC);
4234
4235 if (r > 0) {
4236 /* There's something now, then let's dispatch it */
4237 r = sd_event_dispatch(e);
4238 if (r < 0)
4239 return r;
4240
4241 return 1;
4242 }
4243
4244 return r;
4245}
4246
4247_public_ int sd_event_loop(sd_event *e) {
4248 int r;
4249
4250 assert_return(e, -EINVAL);
4251 assert_return(e = event_resolve(e), -ENOPKG);
4252 assert_return(!event_pid_changed(e), -ECHILD);
4253 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
4254
4255 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4256
4257 while (e->state != SD_EVENT_FINISHED) {
4258 r = sd_event_run(e, UINT64_MAX);
4259 if (r < 0)
4260 return r;
4261 }
4262
4263 return e->exit_code;
4264}
4265
4266_public_ int sd_event_get_fd(sd_event *e) {
4267 assert_return(e, -EINVAL);
4268 assert_return(e = event_resolve(e), -ENOPKG);
4269 assert_return(!event_pid_changed(e), -ECHILD);
4270
4271 return e->epoll_fd;
4272}
4273
4274_public_ int sd_event_get_state(sd_event *e) {
4275 assert_return(e, -EINVAL);
4276 assert_return(e = event_resolve(e), -ENOPKG);
4277 assert_return(!event_pid_changed(e), -ECHILD);
4278
4279 return e->state;
4280}
4281
4282_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
4283 assert_return(e, -EINVAL);
4284 assert_return(e = event_resolve(e), -ENOPKG);
4285 assert_return(code, -EINVAL);
4286 assert_return(!event_pid_changed(e), -ECHILD);
4287
4288 if (!e->exit_requested)
4289 return -ENODATA;
4290
4291 *code = e->exit_code;
4292 return 0;
4293}
4294
4295_public_ int sd_event_exit(sd_event *e, int code) {
4296 assert_return(e, -EINVAL);
4297 assert_return(e = event_resolve(e), -ENOPKG);
4298 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4299 assert_return(!event_pid_changed(e), -ECHILD);
4300
4301 e->exit_requested = true;
4302 e->exit_code = code;
4303
4304 return 0;
4305}
4306
4307_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
4308 assert_return(e, -EINVAL);
4309 assert_return(e = event_resolve(e), -ENOPKG);
4310 assert_return(usec, -EINVAL);
4311 assert_return(!event_pid_changed(e), -ECHILD);
4312
4313 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
4314 return -EOPNOTSUPP;
4315
4316 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
4317 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
4318 * the purpose of getting the time this doesn't matter. */
4319 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
4320 return -EOPNOTSUPP;
4321
4322 if (!triple_timestamp_is_set(&e->timestamp)) {
4323 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
4324 *usec = now(clock);
4325 return 1;
4326 }
4327
4328 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
4329 return 0;
4330}
4331
4332_public_ int sd_event_default(sd_event **ret) {
4333 sd_event *e = NULL;
4334 int r;
4335
4336 if (!ret)
4337 return !!default_event;
4338
4339 if (default_event) {
4340 *ret = sd_event_ref(default_event);
4341 return 0;
4342 }
4343
4344 r = sd_event_new(&e);
4345 if (r < 0)
4346 return r;
4347
4348 e->default_event_ptr = &default_event;
4349 e->tid = gettid();
4350 default_event = e;
4351
4352 *ret = e;
4353 return 1;
4354}
4355
4356_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
4357 assert_return(e, -EINVAL);
4358 assert_return(e = event_resolve(e), -ENOPKG);
4359 assert_return(tid, -EINVAL);
4360 assert_return(!event_pid_changed(e), -ECHILD);
4361
4362 if (e->tid != 0) {
4363 *tid = e->tid;
4364 return 0;
4365 }
4366
4367 return -ENXIO;
4368}
4369
4370_public_ int sd_event_set_watchdog(sd_event *e, int b) {
4371 int r;
4372
4373 assert_return(e, -EINVAL);
4374 assert_return(e = event_resolve(e), -ENOPKG);
4375 assert_return(!event_pid_changed(e), -ECHILD);
4376
4377 if (e->watchdog == !!b)
4378 return e->watchdog;
4379
4380 if (b) {
4381 r = sd_watchdog_enabled(false, &e->watchdog_period);
4382 if (r <= 0)
4383 return r;
4384
4385 /* Issue first ping immediately */
4386 sd_notify(false, "WATCHDOG=1");
4387 e->watchdog_last = now(CLOCK_MONOTONIC);
4388
4389 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
4390 if (e->watchdog_fd < 0)
4391 return -errno;
4392
4393 r = arm_watchdog(e);
4394 if (r < 0)
4395 goto fail;
4396
4397 struct epoll_event ev = {
4398 .events = EPOLLIN,
4399 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
4400 };
4401
4402 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
4403 r = -errno;
4404 goto fail;
4405 }
4406
4407 } else {
4408 if (e->watchdog_fd >= 0) {
4409 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
4410 e->watchdog_fd = safe_close(e->watchdog_fd);
4411 }
4412 }
4413
4414 e->watchdog = !!b;
4415 return e->watchdog;
4416
4417fail:
4418 e->watchdog_fd = safe_close(e->watchdog_fd);
4419 return r;
4420}
4421
4422_public_ int sd_event_get_watchdog(sd_event *e) {
4423 assert_return(e, -EINVAL);
4424 assert_return(e = event_resolve(e), -ENOPKG);
4425 assert_return(!event_pid_changed(e), -ECHILD);
4426
4427 return e->watchdog;
4428}
4429
4430_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
4431 assert_return(e, -EINVAL);
4432 assert_return(e = event_resolve(e), -ENOPKG);
4433 assert_return(!event_pid_changed(e), -ECHILD);
4434
4435 *ret = e->iteration;
4436 return 0;
4437}
4438
4439_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
4440 assert_return(s, -EINVAL);
4441
4442 s->destroy_callback = callback;
4443 return 0;
4444}
4445
4446_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
4447 assert_return(s, -EINVAL);
4448
4449 if (ret)
4450 *ret = s->destroy_callback;
4451
4452 return !!s->destroy_callback;
4453}
4454
4455_public_ int sd_event_source_get_floating(sd_event_source *s) {
4456 assert_return(s, -EINVAL);
4457
4458 return s->floating;
4459}
4460
4461_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
4462 assert_return(s, -EINVAL);
4463
4464 if (s->floating == !!b)
4465 return 0;
4466
4467 if (!s->event) /* Already disconnected */
4468 return -ESTALE;
4469
4470 s->floating = b;
4471
4472 if (b) {
4473 sd_event_source_ref(s);
4474 sd_event_unref(s->event);
4475 } else {
4476 sd_event_ref(s->event);
4477 sd_event_source_unref(s);
4478 }
4479
4480 return 1;
4481}
4482
4483_public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4484 assert_return(s, -EINVAL);
4485 assert_return(s->type != SOURCE_EXIT, -EDOM);
4486
4487 return s->exit_on_failure;
4488}
4489
4490_public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4491 assert_return(s, -EINVAL);
4492 assert_return(s->type != SOURCE_EXIT, -EDOM);
4493
4494 if (s->exit_on_failure == !!b)
4495 return 0;
4496
4497 s->exit_on_failure = b;
4498 return 1;
4499}
4500
4501_public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
4502 int r;
4503
4504 assert_return(s, -EINVAL);
4505
4506 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
4507 * so is a programming error. */
4508 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
4509
4510 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
4511 * non-ratelimited. */
4512 r = event_source_leave_ratelimit(s, /* run_callback */ false);
4513 if (r < 0)
4514 return r;
4515
4516 s->rate_limit = (RateLimit) { interval, burst };
4517 return 0;
4518}
4519
4520_public_ int sd_event_source_set_ratelimit_expire_callback(sd_event_source *s, sd_event_handler_t callback) {
4521 assert_return(s, -EINVAL);
4522
4523 s->ratelimit_expire_callback = callback;
4524 return 0;
4525}
4526
4527_public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
4528 assert_return(s, -EINVAL);
4529
4530 /* Querying whether an event source has ratelimiting configured is not a loggable offsense, hence
4531 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error */
4532 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4533 return -EDOM;
4534
4535 if (!ratelimit_configured(&s->rate_limit))
4536 return -ENOEXEC;
4537
4538 if (ret_interval)
4539 *ret_interval = s->rate_limit.interval;
4540 if (ret_burst)
4541 *ret_burst = s->rate_limit.burst;
4542
4543 return 0;
4544}
4545
4546_public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
4547 assert_return(s, -EINVAL);
4548
4549 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4550 return false;
4551
4552 if (!ratelimit_configured(&s->rate_limit))
4553 return false;
4554
4555 return s->ratelimited;
4556}