]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Merge pull request #18863 from keszybz/cmdline-escaping
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "env-util.h"
13 #include "event-source.h"
14 #include "fd-util.h"
15 #include "fs-util.h"
16 #include "hashmap.h"
17 #include "list.h"
18 #include "macro.h"
19 #include "memory-util.h"
20 #include "missing_syscall.h"
21 #include "prioq.h"
22 #include "process-util.h"
23 #include "set.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "strxcpyx.h"
28 #include "time-util.h"
29
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38 }
39
40 static bool event_source_is_online(sd_event_source *s) {
41 assert(s);
42 return s->enabled != SD_EVENT_OFF && !s->ratelimited;
43 }
44
45 static bool event_source_is_offline(sd_event_source *s) {
46 assert(s);
47 return s->enabled == SD_EVENT_OFF || s->ratelimited;
48 }
49
50 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
51 [SOURCE_IO] = "io",
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
63 [SOURCE_INOTIFY] = "inotify",
64 };
65
66 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67
68 #define EVENT_SOURCE_IS_TIME(t) \
69 IN_SET((t), \
70 SOURCE_TIME_REALTIME, \
71 SOURCE_TIME_BOOTTIME, \
72 SOURCE_TIME_MONOTONIC, \
73 SOURCE_TIME_REALTIME_ALARM, \
74 SOURCE_TIME_BOOTTIME_ALARM)
75
76 #define EVENT_SOURCE_CAN_RATE_LIMIT(t) \
77 IN_SET((t), \
78 SOURCE_IO, \
79 SOURCE_TIME_REALTIME, \
80 SOURCE_TIME_BOOTTIME, \
81 SOURCE_TIME_MONOTONIC, \
82 SOURCE_TIME_REALTIME_ALARM, \
83 SOURCE_TIME_BOOTTIME_ALARM, \
84 SOURCE_SIGNAL, \
85 SOURCE_DEFER, \
86 SOURCE_INOTIFY)
87
88 struct sd_event {
89 unsigned n_ref;
90
91 int epoll_fd;
92 int watchdog_fd;
93
94 Prioq *pending;
95 Prioq *prepare;
96
97 /* timerfd_create() only supports these five clocks so far. We
98 * can add support for more clocks when the kernel learns to
99 * deal with them, too. */
100 struct clock_data realtime;
101 struct clock_data boottime;
102 struct clock_data monotonic;
103 struct clock_data realtime_alarm;
104 struct clock_data boottime_alarm;
105
106 usec_t perturb;
107
108 sd_event_source **signal_sources; /* indexed by signal number */
109 Hashmap *signal_data; /* indexed by priority */
110
111 Hashmap *child_sources;
112 unsigned n_online_child_sources;
113
114 Set *post_sources;
115
116 Prioq *exit;
117
118 Hashmap *inotify_data; /* indexed by priority */
119
120 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
121 LIST_HEAD(struct inode_data, inode_data_to_close);
122
123 /* A list of inotify objects that already have events buffered which aren't processed yet */
124 LIST_HEAD(struct inotify_data, inotify_data_buffered);
125
126 pid_t original_pid;
127
128 uint64_t iteration;
129 triple_timestamp timestamp;
130 int state;
131
132 bool exit_requested:1;
133 bool need_process_child:1;
134 bool watchdog:1;
135 bool profile_delays:1;
136
137 int exit_code;
138
139 pid_t tid;
140 sd_event **default_event_ptr;
141
142 usec_t watchdog_last, watchdog_period;
143
144 unsigned n_sources;
145
146 struct epoll_event *event_queue;
147 size_t event_queue_allocated;
148
149 LIST_HEAD(sd_event_source, sources);
150
151 usec_t last_run_usec, last_log_usec;
152 unsigned delays[sizeof(usec_t) * 8];
153 };
154
155 static thread_local sd_event *default_event = NULL;
156
157 static void source_disconnect(sd_event_source *s);
158 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
159
160 static sd_event *event_resolve(sd_event *e) {
161 return e == SD_EVENT_DEFAULT ? default_event : e;
162 }
163
164 static int pending_prioq_compare(const void *a, const void *b) {
165 const sd_event_source *x = a, *y = b;
166 int r;
167
168 assert(x->pending);
169 assert(y->pending);
170
171 /* Enabled ones first */
172 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
173 return -1;
174 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
175 return 1;
176
177 /* Non rate-limited ones first. */
178 r = CMP(!!x->ratelimited, !!y->ratelimited);
179 if (r != 0)
180 return r;
181
182 /* Lower priority values first */
183 r = CMP(x->priority, y->priority);
184 if (r != 0)
185 return r;
186
187 /* Older entries first */
188 return CMP(x->pending_iteration, y->pending_iteration);
189 }
190
191 static int prepare_prioq_compare(const void *a, const void *b) {
192 const sd_event_source *x = a, *y = b;
193 int r;
194
195 assert(x->prepare);
196 assert(y->prepare);
197
198 /* Enabled ones first */
199 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
200 return -1;
201 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
202 return 1;
203
204 /* Non rate-limited ones first. */
205 r = CMP(!!x->ratelimited, !!y->ratelimited);
206 if (r != 0)
207 return r;
208
209 /* Move most recently prepared ones last, so that we can stop
210 * preparing as soon as we hit one that has already been
211 * prepared in the current iteration */
212 r = CMP(x->prepare_iteration, y->prepare_iteration);
213 if (r != 0)
214 return r;
215
216 /* Lower priority values first */
217 return CMP(x->priority, y->priority);
218 }
219
220 static usec_t time_event_source_next(const sd_event_source *s) {
221 assert(s);
222
223 /* We have two kinds of event sources that have elapsation times associated with them: the actual
224 * time based ones and the ones for which a ratelimit can be in effect (where we want to be notified
225 * once the ratelimit time window ends). Let's return the next elapsing time depending on what we are
226 * looking at here. */
227
228 if (s->ratelimited) { /* If rate-limited the next elapsation is when the ratelimit time window ends */
229 assert(s->rate_limit.begin != 0);
230 assert(s->rate_limit.interval != 0);
231 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
232 }
233
234 /* Otherwise this must be a time event source, if not ratelimited */
235 if (EVENT_SOURCE_IS_TIME(s->type))
236 return s->time.next;
237
238 return USEC_INFINITY;
239 }
240
241 static int earliest_time_prioq_compare(const void *a, const void *b) {
242 const sd_event_source *x = a, *y = b;
243
244 /* Enabled ones first */
245 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
246 return -1;
247 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
248 return 1;
249
250 /* Move the pending ones to the end */
251 if (!x->pending && y->pending)
252 return -1;
253 if (x->pending && !y->pending)
254 return 1;
255
256 /* Order by time */
257 return CMP(time_event_source_next(x), time_event_source_next(y));
258 }
259
260 static usec_t time_event_source_latest(const sd_event_source *s) {
261 assert(s);
262
263 if (s->ratelimited) { /* For ratelimited stuff the earliest and the latest time shall actually be the
264 * same, as we should avoid adding additional inaccuracy on an inaccuracy time
265 * window */
266 assert(s->rate_limit.begin != 0);
267 assert(s->rate_limit.interval != 0);
268 return usec_add(s->rate_limit.begin, s->rate_limit.interval);
269 }
270
271 /* Must be a time event source, if not ratelimited */
272 if (EVENT_SOURCE_IS_TIME(s->type))
273 return usec_add(s->time.next, s->time.accuracy);
274
275 return USEC_INFINITY;
276 }
277
278 static int latest_time_prioq_compare(const void *a, const void *b) {
279 const sd_event_source *x = a, *y = b;
280
281 /* Enabled ones first */
282 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
283 return -1;
284 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
285 return 1;
286
287 /* Move the pending ones to the end */
288 if (!x->pending && y->pending)
289 return -1;
290 if (x->pending && !y->pending)
291 return 1;
292
293 /* Order by time */
294 return CMP(time_event_source_latest(x), time_event_source_latest(y));
295 }
296
297 static int exit_prioq_compare(const void *a, const void *b) {
298 const sd_event_source *x = a, *y = b;
299
300 assert(x->type == SOURCE_EXIT);
301 assert(y->type == SOURCE_EXIT);
302
303 /* Enabled ones first */
304 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
305 return -1;
306 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
307 return 1;
308
309 /* Lower priority values first */
310 return CMP(x->priority, y->priority);
311 }
312
313 static void free_clock_data(struct clock_data *d) {
314 assert(d);
315 assert(d->wakeup == WAKEUP_CLOCK_DATA);
316
317 safe_close(d->fd);
318 prioq_free(d->earliest);
319 prioq_free(d->latest);
320 }
321
322 static sd_event *event_free(sd_event *e) {
323 sd_event_source *s;
324
325 assert(e);
326
327 while ((s = e->sources)) {
328 assert(s->floating);
329 source_disconnect(s);
330 sd_event_source_unref(s);
331 }
332
333 assert(e->n_sources == 0);
334
335 if (e->default_event_ptr)
336 *(e->default_event_ptr) = NULL;
337
338 safe_close(e->epoll_fd);
339 safe_close(e->watchdog_fd);
340
341 free_clock_data(&e->realtime);
342 free_clock_data(&e->boottime);
343 free_clock_data(&e->monotonic);
344 free_clock_data(&e->realtime_alarm);
345 free_clock_data(&e->boottime_alarm);
346
347 prioq_free(e->pending);
348 prioq_free(e->prepare);
349 prioq_free(e->exit);
350
351 free(e->signal_sources);
352 hashmap_free(e->signal_data);
353
354 hashmap_free(e->inotify_data);
355
356 hashmap_free(e->child_sources);
357 set_free(e->post_sources);
358
359 free(e->event_queue);
360
361 return mfree(e);
362 }
363
364 _public_ int sd_event_new(sd_event** ret) {
365 sd_event *e;
366 int r;
367
368 assert_return(ret, -EINVAL);
369
370 e = new(sd_event, 1);
371 if (!e)
372 return -ENOMEM;
373
374 *e = (sd_event) {
375 .n_ref = 1,
376 .epoll_fd = -1,
377 .watchdog_fd = -1,
378 .realtime.wakeup = WAKEUP_CLOCK_DATA,
379 .realtime.fd = -1,
380 .realtime.next = USEC_INFINITY,
381 .boottime.wakeup = WAKEUP_CLOCK_DATA,
382 .boottime.fd = -1,
383 .boottime.next = USEC_INFINITY,
384 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
385 .monotonic.fd = -1,
386 .monotonic.next = USEC_INFINITY,
387 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
388 .realtime_alarm.fd = -1,
389 .realtime_alarm.next = USEC_INFINITY,
390 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
391 .boottime_alarm.fd = -1,
392 .boottime_alarm.next = USEC_INFINITY,
393 .perturb = USEC_INFINITY,
394 .original_pid = getpid_cached(),
395 };
396
397 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
398 if (r < 0)
399 goto fail;
400
401 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
402 if (e->epoll_fd < 0) {
403 r = -errno;
404 goto fail;
405 }
406
407 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
408
409 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
410 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 … 2^63 us will be logged every 5s.");
411 e->profile_delays = true;
412 }
413
414 *ret = e;
415 return 0;
416
417 fail:
418 event_free(e);
419 return r;
420 }
421
422 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
423
424 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
425 if (s)
426 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
427 return sd_event_source_unref(s);
428 }
429
430 static bool event_pid_changed(sd_event *e) {
431 assert(e);
432
433 /* We don't support people creating an event loop and keeping
434 * it around over a fork(). Let's complain. */
435
436 return e->original_pid != getpid_cached();
437 }
438
439 static void source_io_unregister(sd_event_source *s) {
440 assert(s);
441 assert(s->type == SOURCE_IO);
442
443 if (event_pid_changed(s->event))
444 return;
445
446 if (!s->io.registered)
447 return;
448
449 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
450 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
451 strna(s->description), event_source_type_to_string(s->type));
452
453 s->io.registered = false;
454 }
455
456 static int source_io_register(
457 sd_event_source *s,
458 int enabled,
459 uint32_t events) {
460
461 assert(s);
462 assert(s->type == SOURCE_IO);
463 assert(enabled != SD_EVENT_OFF);
464
465 struct epoll_event ev = {
466 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
467 .data.ptr = s,
468 };
469
470 if (epoll_ctl(s->event->epoll_fd,
471 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
472 s->io.fd, &ev) < 0)
473 return -errno;
474
475 s->io.registered = true;
476
477 return 0;
478 }
479
480 static void source_child_pidfd_unregister(sd_event_source *s) {
481 assert(s);
482 assert(s->type == SOURCE_CHILD);
483
484 if (event_pid_changed(s->event))
485 return;
486
487 if (!s->child.registered)
488 return;
489
490 if (EVENT_SOURCE_WATCH_PIDFD(s))
491 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
492 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll, ignoring: %m",
493 strna(s->description), event_source_type_to_string(s->type));
494
495 s->child.registered = false;
496 }
497
498 static int source_child_pidfd_register(sd_event_source *s, int enabled) {
499 assert(s);
500 assert(s->type == SOURCE_CHILD);
501 assert(enabled != SD_EVENT_OFF);
502
503 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
504 struct epoll_event ev = {
505 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
506 .data.ptr = s,
507 };
508
509 if (epoll_ctl(s->event->epoll_fd,
510 s->child.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
511 s->child.pidfd, &ev) < 0)
512 return -errno;
513 }
514
515 s->child.registered = true;
516 return 0;
517 }
518
519 static clockid_t event_source_type_to_clock(EventSourceType t) {
520
521 switch (t) {
522
523 case SOURCE_TIME_REALTIME:
524 return CLOCK_REALTIME;
525
526 case SOURCE_TIME_BOOTTIME:
527 return CLOCK_BOOTTIME;
528
529 case SOURCE_TIME_MONOTONIC:
530 return CLOCK_MONOTONIC;
531
532 case SOURCE_TIME_REALTIME_ALARM:
533 return CLOCK_REALTIME_ALARM;
534
535 case SOURCE_TIME_BOOTTIME_ALARM:
536 return CLOCK_BOOTTIME_ALARM;
537
538 default:
539 return (clockid_t) -1;
540 }
541 }
542
543 static EventSourceType clock_to_event_source_type(clockid_t clock) {
544
545 switch (clock) {
546
547 case CLOCK_REALTIME:
548 return SOURCE_TIME_REALTIME;
549
550 case CLOCK_BOOTTIME:
551 return SOURCE_TIME_BOOTTIME;
552
553 case CLOCK_MONOTONIC:
554 return SOURCE_TIME_MONOTONIC;
555
556 case CLOCK_REALTIME_ALARM:
557 return SOURCE_TIME_REALTIME_ALARM;
558
559 case CLOCK_BOOTTIME_ALARM:
560 return SOURCE_TIME_BOOTTIME_ALARM;
561
562 default:
563 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
564 }
565 }
566
567 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
568 assert(e);
569
570 switch (t) {
571
572 case SOURCE_TIME_REALTIME:
573 return &e->realtime;
574
575 case SOURCE_TIME_BOOTTIME:
576 return &e->boottime;
577
578 case SOURCE_TIME_MONOTONIC:
579 return &e->monotonic;
580
581 case SOURCE_TIME_REALTIME_ALARM:
582 return &e->realtime_alarm;
583
584 case SOURCE_TIME_BOOTTIME_ALARM:
585 return &e->boottime_alarm;
586
587 default:
588 return NULL;
589 }
590 }
591
592 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
593 assert(e);
594
595 if (!d)
596 return;
597
598 hashmap_remove(e->signal_data, &d->priority);
599 safe_close(d->fd);
600 free(d);
601 }
602
603 static int event_make_signal_data(
604 sd_event *e,
605 int sig,
606 struct signal_data **ret) {
607
608 struct signal_data *d;
609 bool added = false;
610 sigset_t ss_copy;
611 int64_t priority;
612 int r;
613
614 assert(e);
615
616 if (event_pid_changed(e))
617 return -ECHILD;
618
619 if (e->signal_sources && e->signal_sources[sig])
620 priority = e->signal_sources[sig]->priority;
621 else
622 priority = SD_EVENT_PRIORITY_NORMAL;
623
624 d = hashmap_get(e->signal_data, &priority);
625 if (d) {
626 if (sigismember(&d->sigset, sig) > 0) {
627 if (ret)
628 *ret = d;
629 return 0;
630 }
631 } else {
632 d = new(struct signal_data, 1);
633 if (!d)
634 return -ENOMEM;
635
636 *d = (struct signal_data) {
637 .wakeup = WAKEUP_SIGNAL_DATA,
638 .fd = -1,
639 .priority = priority,
640 };
641
642 r = hashmap_ensure_put(&e->signal_data, &uint64_hash_ops, &d->priority, d);
643 if (r < 0) {
644 free(d);
645 return r;
646 }
647
648 added = true;
649 }
650
651 ss_copy = d->sigset;
652 assert_se(sigaddset(&ss_copy, sig) >= 0);
653
654 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
655 if (r < 0) {
656 r = -errno;
657 goto fail;
658 }
659
660 d->sigset = ss_copy;
661
662 if (d->fd >= 0) {
663 if (ret)
664 *ret = d;
665 return 0;
666 }
667
668 d->fd = fd_move_above_stdio(r);
669
670 struct epoll_event ev = {
671 .events = EPOLLIN,
672 .data.ptr = d,
673 };
674
675 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
676 r = -errno;
677 goto fail;
678 }
679
680 if (ret)
681 *ret = d;
682
683 return 0;
684
685 fail:
686 if (added)
687 event_free_signal_data(e, d);
688
689 return r;
690 }
691
692 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
693 assert(e);
694 assert(d);
695
696 /* Turns off the specified signal in the signal data
697 * object. If the signal mask of the object becomes empty that
698 * way removes it. */
699
700 if (sigismember(&d->sigset, sig) == 0)
701 return;
702
703 assert_se(sigdelset(&d->sigset, sig) >= 0);
704
705 if (sigisemptyset(&d->sigset)) {
706 /* If all the mask is all-zero we can get rid of the structure */
707 event_free_signal_data(e, d);
708 return;
709 }
710
711 assert(d->fd >= 0);
712
713 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
714 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
715 }
716
717 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
718 struct signal_data *d;
719 static const int64_t zero_priority = 0;
720
721 assert(e);
722
723 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
724 * and possibly drop the signalfd for it. */
725
726 if (sig == SIGCHLD &&
727 e->n_online_child_sources > 0)
728 return;
729
730 if (e->signal_sources &&
731 e->signal_sources[sig] &&
732 event_source_is_online(e->signal_sources[sig]))
733 return;
734
735 /*
736 * The specified signal might be enabled in three different queues:
737 *
738 * 1) the one that belongs to the priority passed (if it is non-NULL)
739 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
740 * 3) the 0 priority (to cover the SIGCHLD case)
741 *
742 * Hence, let's remove it from all three here.
743 */
744
745 if (priority) {
746 d = hashmap_get(e->signal_data, priority);
747 if (d)
748 event_unmask_signal_data(e, d, sig);
749 }
750
751 if (e->signal_sources && e->signal_sources[sig]) {
752 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
753 if (d)
754 event_unmask_signal_data(e, d, sig);
755 }
756
757 d = hashmap_get(e->signal_data, &zero_priority);
758 if (d)
759 event_unmask_signal_data(e, d, sig);
760 }
761
762 static void event_source_pp_prioq_reshuffle(sd_event_source *s) {
763 assert(s);
764
765 /* Reshuffles the pending + prepare prioqs. Called whenever the dispatch order changes, i.e. when
766 * they are enabled/disabled or marked pending and such. */
767
768 if (s->pending)
769 prioq_reshuffle(s->event->pending, s, &s->pending_index);
770
771 if (s->prepare)
772 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
773 }
774
775 static void event_source_time_prioq_reshuffle(sd_event_source *s) {
776 struct clock_data *d;
777
778 assert(s);
779
780 /* Called whenever the event source's timer ordering properties changed, i.e. time, accuracy,
781 * pending, enable state. Makes sure the two prioq's are ordered properly again. */
782
783 if (s->ratelimited)
784 d = &s->event->monotonic;
785 else {
786 assert(EVENT_SOURCE_IS_TIME(s->type));
787 assert_se(d = event_get_clock_data(s->event, s->type));
788 }
789
790 prioq_reshuffle(d->earliest, s, &s->earliest_index);
791 prioq_reshuffle(d->latest, s, &s->latest_index);
792 d->needs_rearm = true;
793 }
794
795 static void event_source_time_prioq_remove(
796 sd_event_source *s,
797 struct clock_data *d) {
798
799 assert(s);
800 assert(d);
801
802 prioq_remove(d->earliest, s, &s->earliest_index);
803 prioq_remove(d->latest, s, &s->latest_index);
804 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
805 d->needs_rearm = true;
806 }
807
808 static void source_disconnect(sd_event_source *s) {
809 sd_event *event;
810
811 assert(s);
812
813 if (!s->event)
814 return;
815
816 assert(s->event->n_sources > 0);
817
818 switch (s->type) {
819
820 case SOURCE_IO:
821 if (s->io.fd >= 0)
822 source_io_unregister(s);
823
824 break;
825
826 case SOURCE_TIME_REALTIME:
827 case SOURCE_TIME_BOOTTIME:
828 case SOURCE_TIME_MONOTONIC:
829 case SOURCE_TIME_REALTIME_ALARM:
830 case SOURCE_TIME_BOOTTIME_ALARM:
831 /* Only remove this event source from the time event source here if it is not ratelimited. If
832 * it is ratelimited, we'll remove it below, separately. Why? Because the clock used might
833 * differ: ratelimiting always uses CLOCK_MONOTONIC, but timer events might use any clock */
834
835 if (!s->ratelimited) {
836 struct clock_data *d;
837 assert_se(d = event_get_clock_data(s->event, s->type));
838 event_source_time_prioq_remove(s, d);
839 }
840
841 break;
842
843 case SOURCE_SIGNAL:
844 if (s->signal.sig > 0) {
845
846 if (s->event->signal_sources)
847 s->event->signal_sources[s->signal.sig] = NULL;
848
849 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
850 }
851
852 break;
853
854 case SOURCE_CHILD:
855 if (s->child.pid > 0) {
856 if (event_source_is_online(s)) {
857 assert(s->event->n_online_child_sources > 0);
858 s->event->n_online_child_sources--;
859 }
860
861 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
862 }
863
864 if (EVENT_SOURCE_WATCH_PIDFD(s))
865 source_child_pidfd_unregister(s);
866 else
867 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
868
869 break;
870
871 case SOURCE_DEFER:
872 /* nothing */
873 break;
874
875 case SOURCE_POST:
876 set_remove(s->event->post_sources, s);
877 break;
878
879 case SOURCE_EXIT:
880 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
881 break;
882
883 case SOURCE_INOTIFY: {
884 struct inode_data *inode_data;
885
886 inode_data = s->inotify.inode_data;
887 if (inode_data) {
888 struct inotify_data *inotify_data;
889 assert_se(inotify_data = inode_data->inotify_data);
890
891 /* Detach this event source from the inode object */
892 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
893 s->inotify.inode_data = NULL;
894
895 if (s->pending) {
896 assert(inotify_data->n_pending > 0);
897 inotify_data->n_pending--;
898 }
899
900 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
901 * continued to being watched. That's because inotify doesn't really have an API for that: we
902 * can only change watch masks with access to the original inode either by fd or by path. But
903 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
904 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
905 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
906 * there), but given the need for open_by_handle_at() which is privileged and not universally
907 * available this would be quite an incomplete solution. Hence we go the other way, leave the
908 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
909 * anymore after reception. Yes, this sucks, but … Linux … */
910
911 /* Maybe release the inode data (and its inotify) */
912 event_gc_inode_data(s->event, inode_data);
913 }
914
915 break;
916 }
917
918 default:
919 assert_not_reached("Wut? I shouldn't exist.");
920 }
921
922 if (s->pending)
923 prioq_remove(s->event->pending, s, &s->pending_index);
924
925 if (s->prepare)
926 prioq_remove(s->event->prepare, s, &s->prepare_index);
927
928 if (s->ratelimited)
929 event_source_time_prioq_remove(s, &s->event->monotonic);
930
931 event = TAKE_PTR(s->event);
932 LIST_REMOVE(sources, event->sources, s);
933 event->n_sources--;
934
935 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
936 * pidfd associated with this event source, which we'll do only on source_free(). */
937
938 if (!s->floating)
939 sd_event_unref(event);
940 }
941
942 static sd_event_source* source_free(sd_event_source *s) {
943 assert(s);
944
945 source_disconnect(s);
946
947 if (s->type == SOURCE_IO && s->io.owned)
948 s->io.fd = safe_close(s->io.fd);
949
950 if (s->type == SOURCE_CHILD) {
951 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
952
953 if (s->child.process_owned) {
954
955 if (!s->child.exited) {
956 bool sent = false;
957
958 if (s->child.pidfd >= 0) {
959 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
960 if (errno == ESRCH) /* Already dead */
961 sent = true;
962 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
963 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
964 s->child.pid);
965 } else
966 sent = true;
967 }
968
969 if (!sent)
970 if (kill(s->child.pid, SIGKILL) < 0)
971 if (errno != ESRCH) /* Already dead */
972 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
973 s->child.pid);
974 }
975
976 if (!s->child.waited) {
977 siginfo_t si = {};
978
979 /* Reap the child if we can */
980 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
981 }
982 }
983
984 if (s->child.pidfd_owned)
985 s->child.pidfd = safe_close(s->child.pidfd);
986 }
987
988 if (s->destroy_callback)
989 s->destroy_callback(s->userdata);
990
991 free(s->description);
992 return mfree(s);
993 }
994 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
995
996 static int source_set_pending(sd_event_source *s, bool b) {
997 int r;
998
999 assert(s);
1000 assert(s->type != SOURCE_EXIT);
1001
1002 if (s->pending == b)
1003 return 0;
1004
1005 s->pending = b;
1006
1007 if (b) {
1008 s->pending_iteration = s->event->iteration;
1009
1010 r = prioq_put(s->event->pending, s, &s->pending_index);
1011 if (r < 0) {
1012 s->pending = false;
1013 return r;
1014 }
1015 } else
1016 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1017
1018 if (EVENT_SOURCE_IS_TIME(s->type))
1019 event_source_time_prioq_reshuffle(s);
1020
1021 if (s->type == SOURCE_SIGNAL && !b) {
1022 struct signal_data *d;
1023
1024 d = hashmap_get(s->event->signal_data, &s->priority);
1025 if (d && d->current == s)
1026 d->current = NULL;
1027 }
1028
1029 if (s->type == SOURCE_INOTIFY) {
1030
1031 assert(s->inotify.inode_data);
1032 assert(s->inotify.inode_data->inotify_data);
1033
1034 if (b)
1035 s->inotify.inode_data->inotify_data->n_pending ++;
1036 else {
1037 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1038 s->inotify.inode_data->inotify_data->n_pending --;
1039 }
1040 }
1041
1042 return 1;
1043 }
1044
1045 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1046 sd_event_source *s;
1047
1048 assert(e);
1049
1050 s = new(sd_event_source, 1);
1051 if (!s)
1052 return NULL;
1053
1054 *s = (struct sd_event_source) {
1055 .n_ref = 1,
1056 .event = e,
1057 .floating = floating,
1058 .type = type,
1059 .pending_index = PRIOQ_IDX_NULL,
1060 .prepare_index = PRIOQ_IDX_NULL,
1061 };
1062
1063 if (!floating)
1064 sd_event_ref(e);
1065
1066 LIST_PREPEND(sources, e->sources, s);
1067 e->n_sources++;
1068
1069 return s;
1070 }
1071
1072 static int io_exit_callback(sd_event_source *s, int fd, uint32_t revents, void *userdata) {
1073 assert(s);
1074
1075 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1076 }
1077
1078 _public_ int sd_event_add_io(
1079 sd_event *e,
1080 sd_event_source **ret,
1081 int fd,
1082 uint32_t events,
1083 sd_event_io_handler_t callback,
1084 void *userdata) {
1085
1086 _cleanup_(source_freep) sd_event_source *s = NULL;
1087 int r;
1088
1089 assert_return(e, -EINVAL);
1090 assert_return(e = event_resolve(e), -ENOPKG);
1091 assert_return(fd >= 0, -EBADF);
1092 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1093 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1094 assert_return(!event_pid_changed(e), -ECHILD);
1095
1096 if (!callback)
1097 callback = io_exit_callback;
1098
1099 s = source_new(e, !ret, SOURCE_IO);
1100 if (!s)
1101 return -ENOMEM;
1102
1103 s->wakeup = WAKEUP_EVENT_SOURCE;
1104 s->io.fd = fd;
1105 s->io.events = events;
1106 s->io.callback = callback;
1107 s->userdata = userdata;
1108 s->enabled = SD_EVENT_ON;
1109
1110 r = source_io_register(s, s->enabled, events);
1111 if (r < 0)
1112 return r;
1113
1114 if (ret)
1115 *ret = s;
1116 TAKE_PTR(s);
1117
1118 return 0;
1119 }
1120
1121 static void initialize_perturb(sd_event *e) {
1122 sd_id128_t bootid = {};
1123
1124 /* When we sleep for longer, we try to realign the wakeup to
1125 the same time within each minute/second/250ms, so that
1126 events all across the system can be coalesced into a single
1127 CPU wakeup. However, let's take some system-specific
1128 randomness for this value, so that in a network of systems
1129 with synced clocks timer events are distributed a
1130 bit. Here, we calculate a perturbation usec offset from the
1131 boot ID. */
1132
1133 if (_likely_(e->perturb != USEC_INFINITY))
1134 return;
1135
1136 if (sd_id128_get_boot(&bootid) >= 0)
1137 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1138 }
1139
1140 static int event_setup_timer_fd(
1141 sd_event *e,
1142 struct clock_data *d,
1143 clockid_t clock) {
1144
1145 assert(e);
1146 assert(d);
1147
1148 if (_likely_(d->fd >= 0))
1149 return 0;
1150
1151 _cleanup_close_ int fd = -1;
1152
1153 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1154 if (fd < 0)
1155 return -errno;
1156
1157 fd = fd_move_above_stdio(fd);
1158
1159 struct epoll_event ev = {
1160 .events = EPOLLIN,
1161 .data.ptr = d,
1162 };
1163
1164 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev) < 0)
1165 return -errno;
1166
1167 d->fd = TAKE_FD(fd);
1168 return 0;
1169 }
1170
1171 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1172 assert(s);
1173
1174 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1175 }
1176
1177 static int setup_clock_data(sd_event *e, struct clock_data *d, clockid_t clock) {
1178 int r;
1179
1180 assert(d);
1181
1182 if (d->fd < 0) {
1183 r = event_setup_timer_fd(e, d, clock);
1184 if (r < 0)
1185 return r;
1186 }
1187
1188 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1189 if (r < 0)
1190 return r;
1191
1192 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1193 if (r < 0)
1194 return r;
1195
1196 return 0;
1197 }
1198
1199 static int event_source_time_prioq_put(
1200 sd_event_source *s,
1201 struct clock_data *d) {
1202
1203 int r;
1204
1205 assert(s);
1206 assert(d);
1207
1208 r = prioq_put(d->earliest, s, &s->earliest_index);
1209 if (r < 0)
1210 return r;
1211
1212 r = prioq_put(d->latest, s, &s->latest_index);
1213 if (r < 0) {
1214 assert_se(prioq_remove(d->earliest, s, &s->earliest_index) > 0);
1215 s->earliest_index = PRIOQ_IDX_NULL;
1216 return r;
1217 }
1218
1219 d->needs_rearm = true;
1220 return 0;
1221 }
1222
1223 _public_ int sd_event_add_time(
1224 sd_event *e,
1225 sd_event_source **ret,
1226 clockid_t clock,
1227 uint64_t usec,
1228 uint64_t accuracy,
1229 sd_event_time_handler_t callback,
1230 void *userdata) {
1231
1232 EventSourceType type;
1233 _cleanup_(source_freep) sd_event_source *s = NULL;
1234 struct clock_data *d;
1235 int r;
1236
1237 assert_return(e, -EINVAL);
1238 assert_return(e = event_resolve(e), -ENOPKG);
1239 assert_return(accuracy != UINT64_MAX, -EINVAL);
1240 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1241 assert_return(!event_pid_changed(e), -ECHILD);
1242
1243 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1244 return -EOPNOTSUPP;
1245
1246 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1247 if (type < 0)
1248 return -EOPNOTSUPP;
1249
1250 if (!callback)
1251 callback = time_exit_callback;
1252
1253 assert_se(d = event_get_clock_data(e, type));
1254
1255 r = setup_clock_data(e, d, clock);
1256 if (r < 0)
1257 return r;
1258
1259 s = source_new(e, !ret, type);
1260 if (!s)
1261 return -ENOMEM;
1262
1263 s->time.next = usec;
1264 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1265 s->time.callback = callback;
1266 s->earliest_index = s->latest_index = PRIOQ_IDX_NULL;
1267 s->userdata = userdata;
1268 s->enabled = SD_EVENT_ONESHOT;
1269
1270 r = event_source_time_prioq_put(s, d);
1271 if (r < 0)
1272 return r;
1273
1274 if (ret)
1275 *ret = s;
1276 TAKE_PTR(s);
1277
1278 return 0;
1279 }
1280
1281 _public_ int sd_event_add_time_relative(
1282 sd_event *e,
1283 sd_event_source **ret,
1284 clockid_t clock,
1285 uint64_t usec,
1286 uint64_t accuracy,
1287 sd_event_time_handler_t callback,
1288 void *userdata) {
1289
1290 usec_t t;
1291 int r;
1292
1293 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1294 * checks for overflow. */
1295
1296 r = sd_event_now(e, clock, &t);
1297 if (r < 0)
1298 return r;
1299
1300 if (usec >= USEC_INFINITY - t)
1301 return -EOVERFLOW;
1302
1303 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1304 }
1305
1306 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1307 assert(s);
1308
1309 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1310 }
1311
1312 _public_ int sd_event_add_signal(
1313 sd_event *e,
1314 sd_event_source **ret,
1315 int sig,
1316 sd_event_signal_handler_t callback,
1317 void *userdata) {
1318
1319 _cleanup_(source_freep) sd_event_source *s = NULL;
1320 struct signal_data *d;
1321 int r;
1322
1323 assert_return(e, -EINVAL);
1324 assert_return(e = event_resolve(e), -ENOPKG);
1325 assert_return(SIGNAL_VALID(sig), -EINVAL);
1326 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1327 assert_return(!event_pid_changed(e), -ECHILD);
1328
1329 if (!callback)
1330 callback = signal_exit_callback;
1331
1332 r = signal_is_blocked(sig);
1333 if (r < 0)
1334 return r;
1335 if (r == 0)
1336 return -EBUSY;
1337
1338 if (!e->signal_sources) {
1339 e->signal_sources = new0(sd_event_source*, _NSIG);
1340 if (!e->signal_sources)
1341 return -ENOMEM;
1342 } else if (e->signal_sources[sig])
1343 return -EBUSY;
1344
1345 s = source_new(e, !ret, SOURCE_SIGNAL);
1346 if (!s)
1347 return -ENOMEM;
1348
1349 s->signal.sig = sig;
1350 s->signal.callback = callback;
1351 s->userdata = userdata;
1352 s->enabled = SD_EVENT_ON;
1353
1354 e->signal_sources[sig] = s;
1355
1356 r = event_make_signal_data(e, sig, &d);
1357 if (r < 0)
1358 return r;
1359
1360 /* Use the signal name as description for the event source by default */
1361 (void) sd_event_source_set_description(s, signal_to_string(sig));
1362
1363 if (ret)
1364 *ret = s;
1365 TAKE_PTR(s);
1366
1367 return 0;
1368 }
1369
1370 static int child_exit_callback(sd_event_source *s, const siginfo_t *si, void *userdata) {
1371 assert(s);
1372
1373 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1374 }
1375
1376 static bool shall_use_pidfd(void) {
1377 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1378 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1379 }
1380
1381 _public_ int sd_event_add_child(
1382 sd_event *e,
1383 sd_event_source **ret,
1384 pid_t pid,
1385 int options,
1386 sd_event_child_handler_t callback,
1387 void *userdata) {
1388
1389 _cleanup_(source_freep) sd_event_source *s = NULL;
1390 int r;
1391
1392 assert_return(e, -EINVAL);
1393 assert_return(e = event_resolve(e), -ENOPKG);
1394 assert_return(pid > 1, -EINVAL);
1395 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1396 assert_return(options != 0, -EINVAL);
1397 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1398 assert_return(!event_pid_changed(e), -ECHILD);
1399
1400 if (!callback)
1401 callback = child_exit_callback;
1402
1403 if (e->n_online_child_sources == 0) {
1404 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1405 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1406 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1407 * take effect.
1408 *
1409 * (As an optimization we only do this check on the first child event source created.) */
1410 r = signal_is_blocked(SIGCHLD);
1411 if (r < 0)
1412 return r;
1413 if (r == 0)
1414 return -EBUSY;
1415 }
1416
1417 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1418 if (r < 0)
1419 return r;
1420
1421 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1422 return -EBUSY;
1423
1424 s = source_new(e, !ret, SOURCE_CHILD);
1425 if (!s)
1426 return -ENOMEM;
1427
1428 s->wakeup = WAKEUP_EVENT_SOURCE;
1429 s->child.pid = pid;
1430 s->child.options = options;
1431 s->child.callback = callback;
1432 s->userdata = userdata;
1433 s->enabled = SD_EVENT_ONESHOT;
1434
1435 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1436 * pin the PID, and make regular waitid() handling race-free. */
1437
1438 if (shall_use_pidfd()) {
1439 s->child.pidfd = pidfd_open(s->child.pid, 0);
1440 if (s->child.pidfd < 0) {
1441 /* Propagate errors unless the syscall is not supported or blocked */
1442 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1443 return -errno;
1444 } else
1445 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1446 } else
1447 s->child.pidfd = -1;
1448
1449 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1450 if (r < 0)
1451 return r;
1452
1453 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1454 /* We have a pidfd and we only want to watch for exit */
1455 r = source_child_pidfd_register(s, s->enabled);
1456 if (r < 0)
1457 return r;
1458
1459 } else {
1460 /* We have no pidfd or we shall wait for some other event than WEXITED */
1461 r = event_make_signal_data(e, SIGCHLD, NULL);
1462 if (r < 0)
1463 return r;
1464
1465 e->need_process_child = true;
1466 }
1467
1468 e->n_online_child_sources++;
1469
1470 if (ret)
1471 *ret = s;
1472 TAKE_PTR(s);
1473 return 0;
1474 }
1475
1476 _public_ int sd_event_add_child_pidfd(
1477 sd_event *e,
1478 sd_event_source **ret,
1479 int pidfd,
1480 int options,
1481 sd_event_child_handler_t callback,
1482 void *userdata) {
1483
1484
1485 _cleanup_(source_freep) sd_event_source *s = NULL;
1486 pid_t pid;
1487 int r;
1488
1489 assert_return(e, -EINVAL);
1490 assert_return(e = event_resolve(e), -ENOPKG);
1491 assert_return(pidfd >= 0, -EBADF);
1492 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1493 assert_return(options != 0, -EINVAL);
1494 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1495 assert_return(!event_pid_changed(e), -ECHILD);
1496
1497 if (!callback)
1498 callback = child_exit_callback;
1499
1500 if (e->n_online_child_sources == 0) {
1501 r = signal_is_blocked(SIGCHLD);
1502 if (r < 0)
1503 return r;
1504 if (r == 0)
1505 return -EBUSY;
1506 }
1507
1508 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1509 if (r < 0)
1510 return r;
1511
1512 r = pidfd_get_pid(pidfd, &pid);
1513 if (r < 0)
1514 return r;
1515
1516 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1517 return -EBUSY;
1518
1519 s = source_new(e, !ret, SOURCE_CHILD);
1520 if (!s)
1521 return -ENOMEM;
1522
1523 s->wakeup = WAKEUP_EVENT_SOURCE;
1524 s->child.pidfd = pidfd;
1525 s->child.pid = pid;
1526 s->child.options = options;
1527 s->child.callback = callback;
1528 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1529 s->userdata = userdata;
1530 s->enabled = SD_EVENT_ONESHOT;
1531
1532 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1533 if (r < 0)
1534 return r;
1535
1536 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1537 /* We only want to watch for WEXITED */
1538 r = source_child_pidfd_register(s, s->enabled);
1539 if (r < 0)
1540 return r;
1541 } else {
1542 /* We shall wait for some other event than WEXITED */
1543 r = event_make_signal_data(e, SIGCHLD, NULL);
1544 if (r < 0)
1545 return r;
1546
1547 e->need_process_child = true;
1548 }
1549
1550 e->n_online_child_sources++;
1551
1552 if (ret)
1553 *ret = s;
1554 TAKE_PTR(s);
1555 return 0;
1556 }
1557
1558 static int generic_exit_callback(sd_event_source *s, void *userdata) {
1559 assert(s);
1560
1561 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1562 }
1563
1564 _public_ int sd_event_add_defer(
1565 sd_event *e,
1566 sd_event_source **ret,
1567 sd_event_handler_t callback,
1568 void *userdata) {
1569
1570 _cleanup_(source_freep) sd_event_source *s = NULL;
1571 int r;
1572
1573 assert_return(e, -EINVAL);
1574 assert_return(e = event_resolve(e), -ENOPKG);
1575 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1576 assert_return(!event_pid_changed(e), -ECHILD);
1577
1578 if (!callback)
1579 callback = generic_exit_callback;
1580
1581 s = source_new(e, !ret, SOURCE_DEFER);
1582 if (!s)
1583 return -ENOMEM;
1584
1585 s->defer.callback = callback;
1586 s->userdata = userdata;
1587 s->enabled = SD_EVENT_ONESHOT;
1588
1589 r = source_set_pending(s, true);
1590 if (r < 0)
1591 return r;
1592
1593 if (ret)
1594 *ret = s;
1595 TAKE_PTR(s);
1596
1597 return 0;
1598 }
1599
1600 _public_ int sd_event_add_post(
1601 sd_event *e,
1602 sd_event_source **ret,
1603 sd_event_handler_t callback,
1604 void *userdata) {
1605
1606 _cleanup_(source_freep) sd_event_source *s = NULL;
1607 int r;
1608
1609 assert_return(e, -EINVAL);
1610 assert_return(e = event_resolve(e), -ENOPKG);
1611 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1612 assert_return(!event_pid_changed(e), -ECHILD);
1613
1614 if (!callback)
1615 callback = generic_exit_callback;
1616
1617 s = source_new(e, !ret, SOURCE_POST);
1618 if (!s)
1619 return -ENOMEM;
1620
1621 s->post.callback = callback;
1622 s->userdata = userdata;
1623 s->enabled = SD_EVENT_ON;
1624
1625 r = set_ensure_put(&e->post_sources, NULL, s);
1626 if (r < 0)
1627 return r;
1628 assert(r > 0);
1629
1630 if (ret)
1631 *ret = s;
1632 TAKE_PTR(s);
1633
1634 return 0;
1635 }
1636
1637 _public_ int sd_event_add_exit(
1638 sd_event *e,
1639 sd_event_source **ret,
1640 sd_event_handler_t callback,
1641 void *userdata) {
1642
1643 _cleanup_(source_freep) sd_event_source *s = NULL;
1644 int r;
1645
1646 assert_return(e, -EINVAL);
1647 assert_return(e = event_resolve(e), -ENOPKG);
1648 assert_return(callback, -EINVAL);
1649 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1650 assert_return(!event_pid_changed(e), -ECHILD);
1651
1652 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1653 if (r < 0)
1654 return r;
1655
1656 s = source_new(e, !ret, SOURCE_EXIT);
1657 if (!s)
1658 return -ENOMEM;
1659
1660 s->exit.callback = callback;
1661 s->userdata = userdata;
1662 s->exit.prioq_index = PRIOQ_IDX_NULL;
1663 s->enabled = SD_EVENT_ONESHOT;
1664
1665 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1666 if (r < 0)
1667 return r;
1668
1669 if (ret)
1670 *ret = s;
1671 TAKE_PTR(s);
1672
1673 return 0;
1674 }
1675
1676 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1677 assert(e);
1678
1679 if (!d)
1680 return;
1681
1682 assert(hashmap_isempty(d->inodes));
1683 assert(hashmap_isempty(d->wd));
1684
1685 if (d->buffer_filled > 0)
1686 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1687
1688 hashmap_free(d->inodes);
1689 hashmap_free(d->wd);
1690
1691 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1692
1693 if (d->fd >= 0) {
1694 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1695 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1696
1697 safe_close(d->fd);
1698 }
1699 free(d);
1700 }
1701
1702 static int event_make_inotify_data(
1703 sd_event *e,
1704 int64_t priority,
1705 struct inotify_data **ret) {
1706
1707 _cleanup_close_ int fd = -1;
1708 struct inotify_data *d;
1709 int r;
1710
1711 assert(e);
1712
1713 d = hashmap_get(e->inotify_data, &priority);
1714 if (d) {
1715 if (ret)
1716 *ret = d;
1717 return 0;
1718 }
1719
1720 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1721 if (fd < 0)
1722 return -errno;
1723
1724 fd = fd_move_above_stdio(fd);
1725
1726 d = new(struct inotify_data, 1);
1727 if (!d)
1728 return -ENOMEM;
1729
1730 *d = (struct inotify_data) {
1731 .wakeup = WAKEUP_INOTIFY_DATA,
1732 .fd = TAKE_FD(fd),
1733 .priority = priority,
1734 };
1735
1736 r = hashmap_ensure_put(&e->inotify_data, &uint64_hash_ops, &d->priority, d);
1737 if (r < 0) {
1738 d->fd = safe_close(d->fd);
1739 free(d);
1740 return r;
1741 }
1742
1743 struct epoll_event ev = {
1744 .events = EPOLLIN,
1745 .data.ptr = d,
1746 };
1747
1748 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1749 r = -errno;
1750 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1751 * remove the fd from the epoll first, which we don't want as we couldn't
1752 * add it in the first place. */
1753 event_free_inotify_data(e, d);
1754 return r;
1755 }
1756
1757 if (ret)
1758 *ret = d;
1759
1760 return 1;
1761 }
1762
1763 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1764 int r;
1765
1766 assert(x);
1767 assert(y);
1768
1769 r = CMP(x->dev, y->dev);
1770 if (r != 0)
1771 return r;
1772
1773 return CMP(x->ino, y->ino);
1774 }
1775
1776 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1777 assert(d);
1778
1779 siphash24_compress(&d->dev, sizeof(d->dev), state);
1780 siphash24_compress(&d->ino, sizeof(d->ino), state);
1781 }
1782
1783 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1784
1785 static void event_free_inode_data(
1786 sd_event *e,
1787 struct inode_data *d) {
1788
1789 assert(e);
1790
1791 if (!d)
1792 return;
1793
1794 assert(!d->event_sources);
1795
1796 if (d->fd >= 0) {
1797 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1798 safe_close(d->fd);
1799 }
1800
1801 if (d->inotify_data) {
1802
1803 if (d->wd >= 0) {
1804 if (d->inotify_data->fd >= 0) {
1805 /* So here's a problem. At the time this runs the watch descriptor might already be
1806 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1807 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1808 * likely case to happen. */
1809
1810 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1811 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1812 }
1813
1814 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1815 }
1816
1817 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1818 }
1819
1820 free(d);
1821 }
1822
1823 static void event_gc_inode_data(
1824 sd_event *e,
1825 struct inode_data *d) {
1826
1827 struct inotify_data *inotify_data;
1828
1829 assert(e);
1830
1831 if (!d)
1832 return;
1833
1834 if (d->event_sources)
1835 return;
1836
1837 inotify_data = d->inotify_data;
1838 event_free_inode_data(e, d);
1839
1840 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1841 event_free_inotify_data(e, inotify_data);
1842 }
1843
1844 static int event_make_inode_data(
1845 sd_event *e,
1846 struct inotify_data *inotify_data,
1847 dev_t dev,
1848 ino_t ino,
1849 struct inode_data **ret) {
1850
1851 struct inode_data *d, key;
1852 int r;
1853
1854 assert(e);
1855 assert(inotify_data);
1856
1857 key = (struct inode_data) {
1858 .ino = ino,
1859 .dev = dev,
1860 };
1861
1862 d = hashmap_get(inotify_data->inodes, &key);
1863 if (d) {
1864 if (ret)
1865 *ret = d;
1866
1867 return 0;
1868 }
1869
1870 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1871 if (r < 0)
1872 return r;
1873
1874 d = new(struct inode_data, 1);
1875 if (!d)
1876 return -ENOMEM;
1877
1878 *d = (struct inode_data) {
1879 .dev = dev,
1880 .ino = ino,
1881 .wd = -1,
1882 .fd = -1,
1883 .inotify_data = inotify_data,
1884 };
1885
1886 r = hashmap_put(inotify_data->inodes, d, d);
1887 if (r < 0) {
1888 free(d);
1889 return r;
1890 }
1891
1892 if (ret)
1893 *ret = d;
1894
1895 return 1;
1896 }
1897
1898 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1899 bool excl_unlink = true;
1900 uint32_t combined = 0;
1901 sd_event_source *s;
1902
1903 assert(d);
1904
1905 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1906 * the IN_EXCL_UNLINK flag is ANDed instead.
1907 *
1908 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1909 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1910 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1911 * events we don't care for client-side. */
1912
1913 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1914
1915 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1916 excl_unlink = false;
1917
1918 combined |= s->inotify.mask;
1919 }
1920
1921 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1922 }
1923
1924 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1925 uint32_t combined_mask;
1926 int wd, r;
1927
1928 assert(d);
1929 assert(d->fd >= 0);
1930
1931 combined_mask = inode_data_determine_mask(d);
1932
1933 if (d->wd >= 0 && combined_mask == d->combined_mask)
1934 return 0;
1935
1936 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1937 if (r < 0)
1938 return r;
1939
1940 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1941 if (wd < 0)
1942 return -errno;
1943
1944 if (d->wd < 0) {
1945 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1946 if (r < 0) {
1947 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1948 return r;
1949 }
1950
1951 d->wd = wd;
1952
1953 } else if (d->wd != wd) {
1954
1955 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1956 (void) inotify_rm_watch(d->fd, wd);
1957 return -EINVAL;
1958 }
1959
1960 d->combined_mask = combined_mask;
1961 return 1;
1962 }
1963
1964 static int inotify_exit_callback(sd_event_source *s, const struct inotify_event *event, void *userdata) {
1965 assert(s);
1966
1967 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1968 }
1969
1970 _public_ int sd_event_add_inotify(
1971 sd_event *e,
1972 sd_event_source **ret,
1973 const char *path,
1974 uint32_t mask,
1975 sd_event_inotify_handler_t callback,
1976 void *userdata) {
1977
1978 struct inotify_data *inotify_data = NULL;
1979 struct inode_data *inode_data = NULL;
1980 _cleanup_close_ int fd = -1;
1981 _cleanup_(source_freep) sd_event_source *s = NULL;
1982 struct stat st;
1983 int r;
1984
1985 assert_return(e, -EINVAL);
1986 assert_return(e = event_resolve(e), -ENOPKG);
1987 assert_return(path, -EINVAL);
1988 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1989 assert_return(!event_pid_changed(e), -ECHILD);
1990
1991 if (!callback)
1992 callback = inotify_exit_callback;
1993
1994 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1995 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1996 * the user can't use them for us. */
1997 if (mask & IN_MASK_ADD)
1998 return -EINVAL;
1999
2000 fd = open(path, O_PATH|O_CLOEXEC|
2001 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
2002 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
2003 if (fd < 0)
2004 return -errno;
2005
2006 if (fstat(fd, &st) < 0)
2007 return -errno;
2008
2009 s = source_new(e, !ret, SOURCE_INOTIFY);
2010 if (!s)
2011 return -ENOMEM;
2012
2013 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
2014 s->inotify.mask = mask;
2015 s->inotify.callback = callback;
2016 s->userdata = userdata;
2017
2018 /* Allocate an inotify object for this priority, and an inode object within it */
2019 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
2020 if (r < 0)
2021 return r;
2022
2023 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
2024 if (r < 0) {
2025 event_free_inotify_data(e, inotify_data);
2026 return r;
2027 }
2028
2029 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
2030 * the event source, until then, for which we need the original inode. */
2031 if (inode_data->fd < 0) {
2032 inode_data->fd = TAKE_FD(fd);
2033 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
2034 }
2035
2036 /* Link our event source to the inode data object */
2037 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
2038 s->inotify.inode_data = inode_data;
2039
2040 /* Actually realize the watch now */
2041 r = inode_data_realize_watch(e, inode_data);
2042 if (r < 0)
2043 return r;
2044
2045 (void) sd_event_source_set_description(s, path);
2046
2047 if (ret)
2048 *ret = s;
2049 TAKE_PTR(s);
2050
2051 return 0;
2052 }
2053
2054 static sd_event_source* event_source_free(sd_event_source *s) {
2055 if (!s)
2056 return NULL;
2057
2058 /* Here's a special hack: when we are called from a
2059 * dispatch handler we won't free the event source
2060 * immediately, but we will detach the fd from the
2061 * epoll. This way it is safe for the caller to unref
2062 * the event source and immediately close the fd, but
2063 * we still retain a valid event source object after
2064 * the callback. */
2065
2066 if (s->dispatching) {
2067 if (s->type == SOURCE_IO)
2068 source_io_unregister(s);
2069
2070 source_disconnect(s);
2071 } else
2072 source_free(s);
2073
2074 return NULL;
2075 }
2076
2077 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
2078
2079 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
2080 assert_return(s, -EINVAL);
2081 assert_return(!event_pid_changed(s->event), -ECHILD);
2082
2083 return free_and_strdup(&s->description, description);
2084 }
2085
2086 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
2087 assert_return(s, -EINVAL);
2088 assert_return(description, -EINVAL);
2089 assert_return(!event_pid_changed(s->event), -ECHILD);
2090
2091 if (!s->description)
2092 return -ENXIO;
2093
2094 *description = s->description;
2095 return 0;
2096 }
2097
2098 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
2099 assert_return(s, NULL);
2100
2101 return s->event;
2102 }
2103
2104 _public_ int sd_event_source_get_pending(sd_event_source *s) {
2105 assert_return(s, -EINVAL);
2106 assert_return(s->type != SOURCE_EXIT, -EDOM);
2107 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2108 assert_return(!event_pid_changed(s->event), -ECHILD);
2109
2110 return s->pending;
2111 }
2112
2113 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
2114 assert_return(s, -EINVAL);
2115 assert_return(s->type == SOURCE_IO, -EDOM);
2116 assert_return(!event_pid_changed(s->event), -ECHILD);
2117
2118 return s->io.fd;
2119 }
2120
2121 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2122 int r;
2123
2124 assert_return(s, -EINVAL);
2125 assert_return(fd >= 0, -EBADF);
2126 assert_return(s->type == SOURCE_IO, -EDOM);
2127 assert_return(!event_pid_changed(s->event), -ECHILD);
2128
2129 if (s->io.fd == fd)
2130 return 0;
2131
2132 if (event_source_is_offline(s)) {
2133 s->io.fd = fd;
2134 s->io.registered = false;
2135 } else {
2136 int saved_fd;
2137
2138 saved_fd = s->io.fd;
2139 assert(s->io.registered);
2140
2141 s->io.fd = fd;
2142 s->io.registered = false;
2143
2144 r = source_io_register(s, s->enabled, s->io.events);
2145 if (r < 0) {
2146 s->io.fd = saved_fd;
2147 s->io.registered = true;
2148 return r;
2149 }
2150
2151 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2152 }
2153
2154 return 0;
2155 }
2156
2157 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2158 assert_return(s, -EINVAL);
2159 assert_return(s->type == SOURCE_IO, -EDOM);
2160
2161 return s->io.owned;
2162 }
2163
2164 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2165 assert_return(s, -EINVAL);
2166 assert_return(s->type == SOURCE_IO, -EDOM);
2167
2168 s->io.owned = own;
2169 return 0;
2170 }
2171
2172 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2173 assert_return(s, -EINVAL);
2174 assert_return(events, -EINVAL);
2175 assert_return(s->type == SOURCE_IO, -EDOM);
2176 assert_return(!event_pid_changed(s->event), -ECHILD);
2177
2178 *events = s->io.events;
2179 return 0;
2180 }
2181
2182 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2183 int r;
2184
2185 assert_return(s, -EINVAL);
2186 assert_return(s->type == SOURCE_IO, -EDOM);
2187 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2188 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2189 assert_return(!event_pid_changed(s->event), -ECHILD);
2190
2191 /* edge-triggered updates are never skipped, so we can reset edges */
2192 if (s->io.events == events && !(events & EPOLLET))
2193 return 0;
2194
2195 r = source_set_pending(s, false);
2196 if (r < 0)
2197 return r;
2198
2199 if (event_source_is_online(s)) {
2200 r = source_io_register(s, s->enabled, events);
2201 if (r < 0)
2202 return r;
2203 }
2204
2205 s->io.events = events;
2206
2207 return 0;
2208 }
2209
2210 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2211 assert_return(s, -EINVAL);
2212 assert_return(revents, -EINVAL);
2213 assert_return(s->type == SOURCE_IO, -EDOM);
2214 assert_return(s->pending, -ENODATA);
2215 assert_return(!event_pid_changed(s->event), -ECHILD);
2216
2217 *revents = s->io.revents;
2218 return 0;
2219 }
2220
2221 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2222 assert_return(s, -EINVAL);
2223 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2224 assert_return(!event_pid_changed(s->event), -ECHILD);
2225
2226 return s->signal.sig;
2227 }
2228
2229 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2230 assert_return(s, -EINVAL);
2231 assert_return(!event_pid_changed(s->event), -ECHILD);
2232
2233 *priority = s->priority;
2234 return 0;
2235 }
2236
2237 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2238 bool rm_inotify = false, rm_inode = false;
2239 struct inotify_data *new_inotify_data = NULL;
2240 struct inode_data *new_inode_data = NULL;
2241 int r;
2242
2243 assert_return(s, -EINVAL);
2244 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2245 assert_return(!event_pid_changed(s->event), -ECHILD);
2246
2247 if (s->priority == priority)
2248 return 0;
2249
2250 if (s->type == SOURCE_INOTIFY) {
2251 struct inode_data *old_inode_data;
2252
2253 assert(s->inotify.inode_data);
2254 old_inode_data = s->inotify.inode_data;
2255
2256 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2257 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2258 * events we allow priority changes only until the first following iteration. */
2259 if (old_inode_data->fd < 0)
2260 return -EOPNOTSUPP;
2261
2262 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2263 if (r < 0)
2264 return r;
2265 rm_inotify = r > 0;
2266
2267 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2268 if (r < 0)
2269 goto fail;
2270 rm_inode = r > 0;
2271
2272 if (new_inode_data->fd < 0) {
2273 /* Duplicate the fd for the new inode object if we don't have any yet */
2274 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2275 if (new_inode_data->fd < 0) {
2276 r = -errno;
2277 goto fail;
2278 }
2279
2280 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2281 }
2282
2283 /* Move the event source to the new inode data structure */
2284 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2285 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2286 s->inotify.inode_data = new_inode_data;
2287
2288 /* Now create the new watch */
2289 r = inode_data_realize_watch(s->event, new_inode_data);
2290 if (r < 0) {
2291 /* Move it back */
2292 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2293 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2294 s->inotify.inode_data = old_inode_data;
2295 goto fail;
2296 }
2297
2298 s->priority = priority;
2299
2300 event_gc_inode_data(s->event, old_inode_data);
2301
2302 } else if (s->type == SOURCE_SIGNAL && event_source_is_online(s)) {
2303 struct signal_data *old, *d;
2304
2305 /* Move us from the signalfd belonging to the old
2306 * priority to the signalfd of the new priority */
2307
2308 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2309
2310 s->priority = priority;
2311
2312 r = event_make_signal_data(s->event, s->signal.sig, &d);
2313 if (r < 0) {
2314 s->priority = old->priority;
2315 return r;
2316 }
2317
2318 event_unmask_signal_data(s->event, old, s->signal.sig);
2319 } else
2320 s->priority = priority;
2321
2322 event_source_pp_prioq_reshuffle(s);
2323
2324 if (s->type == SOURCE_EXIT)
2325 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2326
2327 return 0;
2328
2329 fail:
2330 if (rm_inode)
2331 event_free_inode_data(s->event, new_inode_data);
2332
2333 if (rm_inotify)
2334 event_free_inotify_data(s->event, new_inotify_data);
2335
2336 return r;
2337 }
2338
2339 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *ret) {
2340 assert_return(s, -EINVAL);
2341 assert_return(!event_pid_changed(s->event), -ECHILD);
2342
2343 if (ret)
2344 *ret = s->enabled;
2345
2346 return s->enabled != SD_EVENT_OFF;
2347 }
2348
2349 static int event_source_offline(
2350 sd_event_source *s,
2351 int enabled,
2352 bool ratelimited) {
2353
2354 bool was_offline;
2355 int r;
2356
2357 assert(s);
2358 assert(enabled == SD_EVENT_OFF || ratelimited);
2359
2360 /* Unset the pending flag when this event source is disabled */
2361 if (s->enabled != SD_EVENT_OFF &&
2362 enabled == SD_EVENT_OFF &&
2363 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2364 r = source_set_pending(s, false);
2365 if (r < 0)
2366 return r;
2367 }
2368
2369 was_offline = event_source_is_offline(s);
2370 s->enabled = enabled;
2371 s->ratelimited = ratelimited;
2372
2373 switch (s->type) {
2374
2375 case SOURCE_IO:
2376 source_io_unregister(s);
2377 break;
2378
2379 case SOURCE_TIME_REALTIME:
2380 case SOURCE_TIME_BOOTTIME:
2381 case SOURCE_TIME_MONOTONIC:
2382 case SOURCE_TIME_REALTIME_ALARM:
2383 case SOURCE_TIME_BOOTTIME_ALARM:
2384 event_source_time_prioq_reshuffle(s);
2385 break;
2386
2387 case SOURCE_SIGNAL:
2388 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2389 break;
2390
2391 case SOURCE_CHILD:
2392 if (!was_offline) {
2393 assert(s->event->n_online_child_sources > 0);
2394 s->event->n_online_child_sources--;
2395 }
2396
2397 if (EVENT_SOURCE_WATCH_PIDFD(s))
2398 source_child_pidfd_unregister(s);
2399 else
2400 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2401 break;
2402
2403 case SOURCE_EXIT:
2404 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2405 break;
2406
2407 case SOURCE_DEFER:
2408 case SOURCE_POST:
2409 case SOURCE_INOTIFY:
2410 break;
2411
2412 default:
2413 assert_not_reached("Wut? I shouldn't exist.");
2414 }
2415
2416 return 1;
2417 }
2418
2419 static int event_source_online(
2420 sd_event_source *s,
2421 int enabled,
2422 bool ratelimited) {
2423
2424 bool was_online;
2425 int r;
2426
2427 assert(s);
2428 assert(enabled != SD_EVENT_OFF || !ratelimited);
2429
2430 /* Unset the pending flag when this event source is enabled */
2431 if (s->enabled == SD_EVENT_OFF &&
2432 enabled != SD_EVENT_OFF &&
2433 !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2434 r = source_set_pending(s, false);
2435 if (r < 0)
2436 return r;
2437 }
2438
2439 /* Are we really ready for onlining? */
2440 if (enabled == SD_EVENT_OFF || ratelimited) {
2441 /* Nope, we are not ready for onlining, then just update the precise state and exit */
2442 s->enabled = enabled;
2443 s->ratelimited = ratelimited;
2444 return 0;
2445 }
2446
2447 was_online = event_source_is_online(s);
2448
2449 switch (s->type) {
2450 case SOURCE_IO:
2451 r = source_io_register(s, enabled, s->io.events);
2452 if (r < 0)
2453 return r;
2454 break;
2455
2456 case SOURCE_SIGNAL:
2457 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2458 if (r < 0) {
2459 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2460 return r;
2461 }
2462
2463 break;
2464
2465 case SOURCE_CHILD:
2466 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2467 /* yes, we have pidfd */
2468
2469 r = source_child_pidfd_register(s, enabled);
2470 if (r < 0)
2471 return r;
2472 } else {
2473 /* no pidfd, or something other to watch for than WEXITED */
2474
2475 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2476 if (r < 0) {
2477 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2478 return r;
2479 }
2480 }
2481
2482 if (!was_online)
2483 s->event->n_online_child_sources++;
2484 break;
2485
2486 case SOURCE_TIME_REALTIME:
2487 case SOURCE_TIME_BOOTTIME:
2488 case SOURCE_TIME_MONOTONIC:
2489 case SOURCE_TIME_REALTIME_ALARM:
2490 case SOURCE_TIME_BOOTTIME_ALARM:
2491 case SOURCE_EXIT:
2492 case SOURCE_DEFER:
2493 case SOURCE_POST:
2494 case SOURCE_INOTIFY:
2495 break;
2496
2497 default:
2498 assert_not_reached("Wut? I shouldn't exist.");
2499 }
2500
2501 s->enabled = enabled;
2502 s->ratelimited = ratelimited;
2503
2504 /* Non-failing operations below */
2505 switch (s->type) {
2506 case SOURCE_TIME_REALTIME:
2507 case SOURCE_TIME_BOOTTIME:
2508 case SOURCE_TIME_MONOTONIC:
2509 case SOURCE_TIME_REALTIME_ALARM:
2510 case SOURCE_TIME_BOOTTIME_ALARM:
2511 event_source_time_prioq_reshuffle(s);
2512 break;
2513
2514 case SOURCE_EXIT:
2515 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2516 break;
2517
2518 default:
2519 break;
2520 }
2521
2522 return 1;
2523 }
2524
2525 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2526 int r;
2527
2528 assert_return(s, -EINVAL);
2529 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2530 assert_return(!event_pid_changed(s->event), -ECHILD);
2531
2532 /* If we are dead anyway, we are fine with turning off sources, but everything else needs to fail. */
2533 if (s->event->state == SD_EVENT_FINISHED)
2534 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2535
2536 if (s->enabled == m) /* No change? */
2537 return 0;
2538
2539 if (m == SD_EVENT_OFF)
2540 r = event_source_offline(s, m, s->ratelimited);
2541 else {
2542 if (s->enabled != SD_EVENT_OFF) {
2543 /* Switching from "on" to "oneshot" or back? If that's the case, we can take a shortcut, the
2544 * event source is already enabled after all. */
2545 s->enabled = m;
2546 return 0;
2547 }
2548
2549 r = event_source_online(s, m, s->ratelimited);
2550 }
2551 if (r < 0)
2552 return r;
2553
2554 event_source_pp_prioq_reshuffle(s);
2555 return 0;
2556 }
2557
2558 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2559 assert_return(s, -EINVAL);
2560 assert_return(usec, -EINVAL);
2561 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2562 assert_return(!event_pid_changed(s->event), -ECHILD);
2563
2564 *usec = s->time.next;
2565 return 0;
2566 }
2567
2568 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2569 int r;
2570
2571 assert_return(s, -EINVAL);
2572 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2573 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2574 assert_return(!event_pid_changed(s->event), -ECHILD);
2575
2576 r = source_set_pending(s, false);
2577 if (r < 0)
2578 return r;
2579
2580 s->time.next = usec;
2581
2582 event_source_time_prioq_reshuffle(s);
2583 return 0;
2584 }
2585
2586 _public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2587 usec_t t;
2588 int r;
2589
2590 assert_return(s, -EINVAL);
2591 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2592
2593 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2594 if (r < 0)
2595 return r;
2596
2597 usec = usec_add(t, usec);
2598 if (usec == USEC_INFINITY)
2599 return -EOVERFLOW;
2600
2601 return sd_event_source_set_time(s, usec);
2602 }
2603
2604 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2605 assert_return(s, -EINVAL);
2606 assert_return(usec, -EINVAL);
2607 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2608 assert_return(!event_pid_changed(s->event), -ECHILD);
2609
2610 *usec = s->time.accuracy;
2611 return 0;
2612 }
2613
2614 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2615 int r;
2616
2617 assert_return(s, -EINVAL);
2618 assert_return(usec != UINT64_MAX, -EINVAL);
2619 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2620 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2621 assert_return(!event_pid_changed(s->event), -ECHILD);
2622
2623 r = source_set_pending(s, false);
2624 if (r < 0)
2625 return r;
2626
2627 if (usec == 0)
2628 usec = DEFAULT_ACCURACY_USEC;
2629
2630 s->time.accuracy = usec;
2631
2632 event_source_time_prioq_reshuffle(s);
2633 return 0;
2634 }
2635
2636 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2637 assert_return(s, -EINVAL);
2638 assert_return(clock, -EINVAL);
2639 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2640 assert_return(!event_pid_changed(s->event), -ECHILD);
2641
2642 *clock = event_source_type_to_clock(s->type);
2643 return 0;
2644 }
2645
2646 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2647 assert_return(s, -EINVAL);
2648 assert_return(pid, -EINVAL);
2649 assert_return(s->type == SOURCE_CHILD, -EDOM);
2650 assert_return(!event_pid_changed(s->event), -ECHILD);
2651
2652 *pid = s->child.pid;
2653 return 0;
2654 }
2655
2656 _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2657 assert_return(s, -EINVAL);
2658 assert_return(s->type == SOURCE_CHILD, -EDOM);
2659 assert_return(!event_pid_changed(s->event), -ECHILD);
2660
2661 if (s->child.pidfd < 0)
2662 return -EOPNOTSUPP;
2663
2664 return s->child.pidfd;
2665 }
2666
2667 _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2668 assert_return(s, -EINVAL);
2669 assert_return(s->type == SOURCE_CHILD, -EDOM);
2670 assert_return(!event_pid_changed(s->event), -ECHILD);
2671 assert_return(SIGNAL_VALID(sig), -EINVAL);
2672
2673 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2674 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2675 * available. */
2676 if (s->child.exited)
2677 return -ESRCH;
2678
2679 if (s->child.pidfd >= 0) {
2680 siginfo_t copy;
2681
2682 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2683 * structure here */
2684 if (si)
2685 copy = *si;
2686
2687 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2688 /* Let's propagate the error only if the system call is not implemented or prohibited */
2689 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2690 return -errno;
2691 } else
2692 return 0;
2693 }
2694
2695 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2696 * this here. */
2697 if (flags != 0)
2698 return -EOPNOTSUPP;
2699
2700 if (si) {
2701 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2702 siginfo_t copy = *si;
2703
2704 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2705 return -errno;
2706 } else if (kill(s->child.pid, sig) < 0)
2707 return -errno;
2708
2709 return 0;
2710 }
2711
2712 _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2713 assert_return(s, -EINVAL);
2714 assert_return(s->type == SOURCE_CHILD, -EDOM);
2715
2716 if (s->child.pidfd < 0)
2717 return -EOPNOTSUPP;
2718
2719 return s->child.pidfd_owned;
2720 }
2721
2722 _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2723 assert_return(s, -EINVAL);
2724 assert_return(s->type == SOURCE_CHILD, -EDOM);
2725
2726 if (s->child.pidfd < 0)
2727 return -EOPNOTSUPP;
2728
2729 s->child.pidfd_owned = own;
2730 return 0;
2731 }
2732
2733 _public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2734 assert_return(s, -EINVAL);
2735 assert_return(s->type == SOURCE_CHILD, -EDOM);
2736
2737 return s->child.process_owned;
2738 }
2739
2740 _public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2741 assert_return(s, -EINVAL);
2742 assert_return(s->type == SOURCE_CHILD, -EDOM);
2743
2744 s->child.process_owned = own;
2745 return 0;
2746 }
2747
2748 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2749 assert_return(s, -EINVAL);
2750 assert_return(mask, -EINVAL);
2751 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2752 assert_return(!event_pid_changed(s->event), -ECHILD);
2753
2754 *mask = s->inotify.mask;
2755 return 0;
2756 }
2757
2758 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2759 int r;
2760
2761 assert_return(s, -EINVAL);
2762 assert_return(s->type != SOURCE_EXIT, -EDOM);
2763 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2764 assert_return(!event_pid_changed(s->event), -ECHILD);
2765
2766 if (s->prepare == callback)
2767 return 0;
2768
2769 if (callback && s->prepare) {
2770 s->prepare = callback;
2771 return 0;
2772 }
2773
2774 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2775 if (r < 0)
2776 return r;
2777
2778 s->prepare = callback;
2779
2780 if (callback) {
2781 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2782 if (r < 0)
2783 return r;
2784 } else
2785 prioq_remove(s->event->prepare, s, &s->prepare_index);
2786
2787 return 0;
2788 }
2789
2790 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2791 assert_return(s, NULL);
2792
2793 return s->userdata;
2794 }
2795
2796 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2797 void *ret;
2798
2799 assert_return(s, NULL);
2800
2801 ret = s->userdata;
2802 s->userdata = userdata;
2803
2804 return ret;
2805 }
2806
2807 static int event_source_enter_ratelimited(sd_event_source *s) {
2808 int r;
2809
2810 assert(s);
2811
2812 /* When an event source becomes ratelimited, we place it in the CLOCK_MONOTONIC priority queue, with
2813 * the end of the rate limit time window, much as if it was a timer event source. */
2814
2815 if (s->ratelimited)
2816 return 0; /* Already ratelimited, this is a NOP hence */
2817
2818 /* Make sure we can install a CLOCK_MONOTONIC event further down. */
2819 r = setup_clock_data(s->event, &s->event->monotonic, CLOCK_MONOTONIC);
2820 if (r < 0)
2821 return r;
2822
2823 /* Timer event sources are already using the earliest/latest queues for the timer scheduling. Let's
2824 * first remove them from the prioq appropriate for their own clock, so that we can use the prioq
2825 * fields of the event source then for adding it to the CLOCK_MONOTONIC prioq instead. */
2826 if (EVENT_SOURCE_IS_TIME(s->type))
2827 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2828
2829 /* Now, let's add the event source to the monotonic clock instead */
2830 r = event_source_time_prioq_put(s, &s->event->monotonic);
2831 if (r < 0)
2832 goto fail;
2833
2834 /* And let's take the event source officially offline */
2835 r = event_source_offline(s, s->enabled, /* ratelimited= */ true);
2836 if (r < 0) {
2837 event_source_time_prioq_remove(s, &s->event->monotonic);
2838 goto fail;
2839 }
2840
2841 event_source_pp_prioq_reshuffle(s);
2842
2843 log_debug("Event source %p (%s) entered rate limit state.", s, strna(s->description));
2844 return 0;
2845
2846 fail:
2847 /* Reinstall time event sources in the priority queue as before. This shouldn't fail, since the queue
2848 * space for it should already be allocated. */
2849 if (EVENT_SOURCE_IS_TIME(s->type))
2850 assert_se(event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type)) >= 0);
2851
2852 return r;
2853 }
2854
2855 static int event_source_leave_ratelimit(sd_event_source *s) {
2856 int r;
2857
2858 assert(s);
2859
2860 if (!s->ratelimited)
2861 return 0;
2862
2863 /* Let's take the event source out of the monotonic prioq first. */
2864 event_source_time_prioq_remove(s, &s->event->monotonic);
2865
2866 /* Let's then add the event source to its native clock prioq again — if this is a timer event source */
2867 if (EVENT_SOURCE_IS_TIME(s->type)) {
2868 r = event_source_time_prioq_put(s, event_get_clock_data(s->event, s->type));
2869 if (r < 0)
2870 goto fail;
2871 }
2872
2873 /* Let's try to take it online again. */
2874 r = event_source_online(s, s->enabled, /* ratelimited= */ false);
2875 if (r < 0) {
2876 /* Do something roughly sensible when this failed: undo the two prioq ops above */
2877 if (EVENT_SOURCE_IS_TIME(s->type))
2878 event_source_time_prioq_remove(s, event_get_clock_data(s->event, s->type));
2879
2880 goto fail;
2881 }
2882
2883 event_source_pp_prioq_reshuffle(s);
2884 ratelimit_reset(&s->rate_limit);
2885
2886 log_debug("Event source %p (%s) left rate limit state.", s, strna(s->description));
2887 return 0;
2888
2889 fail:
2890 /* Do something somewhat reasonable when we cannot move an event sources out of ratelimited mode:
2891 * simply put it back in it, maybe we can then process it more successfully next iteration. */
2892 assert_se(event_source_time_prioq_put(s, &s->event->monotonic) >= 0);
2893
2894 return r;
2895 }
2896
2897 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2898 usec_t c;
2899 assert(e);
2900 assert(a <= b);
2901
2902 if (a <= 0)
2903 return 0;
2904 if (a >= USEC_INFINITY)
2905 return USEC_INFINITY;
2906
2907 if (b <= a + 1)
2908 return a;
2909
2910 initialize_perturb(e);
2911
2912 /*
2913 Find a good time to wake up again between times a and b. We
2914 have two goals here:
2915
2916 a) We want to wake up as seldom as possible, hence prefer
2917 later times over earlier times.
2918
2919 b) But if we have to wake up, then let's make sure to
2920 dispatch as much as possible on the entire system.
2921
2922 We implement this by waking up everywhere at the same time
2923 within any given minute if we can, synchronised via the
2924 perturbation value determined from the boot ID. If we can't,
2925 then we try to find the same spot in every 10s, then 1s and
2926 then 250ms step. Otherwise, we pick the last possible time
2927 to wake up.
2928 */
2929
2930 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2931 if (c >= b) {
2932 if (_unlikely_(c < USEC_PER_MINUTE))
2933 return b;
2934
2935 c -= USEC_PER_MINUTE;
2936 }
2937
2938 if (c >= a)
2939 return c;
2940
2941 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2942 if (c >= b) {
2943 if (_unlikely_(c < USEC_PER_SEC*10))
2944 return b;
2945
2946 c -= USEC_PER_SEC*10;
2947 }
2948
2949 if (c >= a)
2950 return c;
2951
2952 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2953 if (c >= b) {
2954 if (_unlikely_(c < USEC_PER_SEC))
2955 return b;
2956
2957 c -= USEC_PER_SEC;
2958 }
2959
2960 if (c >= a)
2961 return c;
2962
2963 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2964 if (c >= b) {
2965 if (_unlikely_(c < USEC_PER_MSEC*250))
2966 return b;
2967
2968 c -= USEC_PER_MSEC*250;
2969 }
2970
2971 if (c >= a)
2972 return c;
2973
2974 return b;
2975 }
2976
2977 static int event_arm_timer(
2978 sd_event *e,
2979 struct clock_data *d) {
2980
2981 struct itimerspec its = {};
2982 sd_event_source *a, *b;
2983 usec_t t;
2984
2985 assert(e);
2986 assert(d);
2987
2988 if (!d->needs_rearm)
2989 return 0;
2990 else
2991 d->needs_rearm = false;
2992
2993 a = prioq_peek(d->earliest);
2994 if (!a || a->enabled == SD_EVENT_OFF || time_event_source_next(a) == USEC_INFINITY) {
2995
2996 if (d->fd < 0)
2997 return 0;
2998
2999 if (d->next == USEC_INFINITY)
3000 return 0;
3001
3002 /* disarm */
3003 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3004 return -errno;
3005
3006 d->next = USEC_INFINITY;
3007 return 0;
3008 }
3009
3010 b = prioq_peek(d->latest);
3011 assert_se(b && b->enabled != SD_EVENT_OFF);
3012
3013 t = sleep_between(e, time_event_source_next(a), time_event_source_latest(b));
3014 if (d->next == t)
3015 return 0;
3016
3017 assert_se(d->fd >= 0);
3018
3019 if (t == 0) {
3020 /* We don' want to disarm here, just mean some time looooong ago. */
3021 its.it_value.tv_sec = 0;
3022 its.it_value.tv_nsec = 1;
3023 } else
3024 timespec_store(&its.it_value, t);
3025
3026 if (timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3027 return -errno;
3028
3029 d->next = t;
3030 return 0;
3031 }
3032
3033 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
3034 assert(e);
3035 assert(s);
3036 assert(s->type == SOURCE_IO);
3037
3038 /* If the event source was already pending, we just OR in the
3039 * new revents, otherwise we reset the value. The ORing is
3040 * necessary to handle EPOLLONESHOT events properly where
3041 * readability might happen independently of writability, and
3042 * we need to keep track of both */
3043
3044 if (s->pending)
3045 s->io.revents |= revents;
3046 else
3047 s->io.revents = revents;
3048
3049 return source_set_pending(s, true);
3050 }
3051
3052 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
3053 uint64_t x;
3054 ssize_t ss;
3055
3056 assert(e);
3057 assert(fd >= 0);
3058
3059 assert_return(events == EPOLLIN, -EIO);
3060
3061 ss = read(fd, &x, sizeof(x));
3062 if (ss < 0) {
3063 if (IN_SET(errno, EAGAIN, EINTR))
3064 return 0;
3065
3066 return -errno;
3067 }
3068
3069 if (_unlikely_(ss != sizeof(x)))
3070 return -EIO;
3071
3072 if (next)
3073 *next = USEC_INFINITY;
3074
3075 return 0;
3076 }
3077
3078 static int process_timer(
3079 sd_event *e,
3080 usec_t n,
3081 struct clock_data *d) {
3082
3083 sd_event_source *s;
3084 int r;
3085
3086 assert(e);
3087 assert(d);
3088
3089 for (;;) {
3090 s = prioq_peek(d->earliest);
3091 if (!s || time_event_source_next(s) > n)
3092 break;
3093
3094 if (s->ratelimited) {
3095 /* This is an event sources whose ratelimit window has ended. Let's turn it on
3096 * again. */
3097 assert(s->ratelimited);
3098
3099 r = event_source_leave_ratelimit(s);
3100 if (r < 0)
3101 return r;
3102
3103 continue;
3104 }
3105
3106 if (s->enabled == SD_EVENT_OFF || s->pending)
3107 break;
3108
3109 r = source_set_pending(s, true);
3110 if (r < 0)
3111 return r;
3112
3113 event_source_time_prioq_reshuffle(s);
3114 }
3115
3116 return 0;
3117 }
3118
3119 static int process_child(sd_event *e, int64_t threshold, int64_t *ret_min_priority) {
3120 int64_t min_priority = threshold;
3121 bool something_new = false;
3122 sd_event_source *s;
3123 int r;
3124
3125 assert(e);
3126 assert(ret_min_priority);
3127
3128 if (!e->need_process_child) {
3129 *ret_min_priority = min_priority;
3130 return 0;
3131 }
3132
3133 e->need_process_child = false;
3134
3135 /*
3136 So, this is ugly. We iteratively invoke waitid() with P_PID
3137 + WNOHANG for each PID we wait for, instead of using
3138 P_ALL. This is because we only want to get child
3139 information of very specific child processes, and not all
3140 of them. We might not have processed the SIGCHLD even of a
3141 previous invocation and we don't want to maintain a
3142 unbounded *per-child* event queue, hence we really don't
3143 want anything flushed out of the kernel's queue that we
3144 don't care about. Since this is O(n) this means that if you
3145 have a lot of processes you probably want to handle SIGCHLD
3146 yourself.
3147
3148 We do not reap the children here (by using WNOWAIT), this
3149 is only done after the event source is dispatched so that
3150 the callback still sees the process as a zombie.
3151 */
3152
3153 HASHMAP_FOREACH(s, e->child_sources) {
3154 assert(s->type == SOURCE_CHILD);
3155
3156 if (s->priority > threshold)
3157 continue;
3158
3159 if (s->pending)
3160 continue;
3161
3162 if (event_source_is_offline(s))
3163 continue;
3164
3165 if (s->child.exited)
3166 continue;
3167
3168 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
3169 continue;
3170
3171 zero(s->child.siginfo);
3172 if (waitid(P_PID, s->child.pid, &s->child.siginfo,
3173 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options) < 0)
3174 return negative_errno();
3175
3176 if (s->child.siginfo.si_pid != 0) {
3177 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3178
3179 if (zombie)
3180 s->child.exited = true;
3181
3182 if (!zombie && (s->child.options & WEXITED)) {
3183 /* If the child isn't dead then let's
3184 * immediately remove the state change
3185 * from the queue, since there's no
3186 * benefit in leaving it queued */
3187
3188 assert(s->child.options & (WSTOPPED|WCONTINUED));
3189 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
3190 }
3191
3192 r = source_set_pending(s, true);
3193 if (r < 0)
3194 return r;
3195 if (r > 0) {
3196 something_new = true;
3197 min_priority = MIN(min_priority, s->priority);
3198 }
3199 }
3200 }
3201
3202 *ret_min_priority = min_priority;
3203 return something_new;
3204 }
3205
3206 static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
3207 assert(e);
3208 assert(s);
3209 assert(s->type == SOURCE_CHILD);
3210
3211 if (s->pending)
3212 return 0;
3213
3214 if (event_source_is_offline(s))
3215 return 0;
3216
3217 if (!EVENT_SOURCE_WATCH_PIDFD(s))
3218 return 0;
3219
3220 zero(s->child.siginfo);
3221 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
3222 return -errno;
3223
3224 if (s->child.siginfo.si_pid == 0)
3225 return 0;
3226
3227 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
3228 s->child.exited = true;
3229
3230 return source_set_pending(s, true);
3231 }
3232
3233 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events, int64_t *min_priority) {
3234 int r;
3235
3236 assert(e);
3237 assert(d);
3238 assert_return(events == EPOLLIN, -EIO);
3239 assert(min_priority);
3240
3241 /* If there's a signal queued on this priority and SIGCHLD is
3242 on this priority too, then make sure to recheck the
3243 children we watch. This is because we only ever dequeue
3244 the first signal per priority, and if we dequeue one, and
3245 SIGCHLD might be enqueued later we wouldn't know, but we
3246 might have higher priority children we care about hence we
3247 need to check that explicitly. */
3248
3249 if (sigismember(&d->sigset, SIGCHLD))
3250 e->need_process_child = true;
3251
3252 /* If there's already an event source pending for this
3253 * priority we don't read another */
3254 if (d->current)
3255 return 0;
3256
3257 for (;;) {
3258 struct signalfd_siginfo si;
3259 ssize_t n;
3260 sd_event_source *s = NULL;
3261
3262 n = read(d->fd, &si, sizeof(si));
3263 if (n < 0) {
3264 if (IN_SET(errno, EAGAIN, EINTR))
3265 return 0;
3266
3267 return -errno;
3268 }
3269
3270 if (_unlikely_(n != sizeof(si)))
3271 return -EIO;
3272
3273 assert(SIGNAL_VALID(si.ssi_signo));
3274
3275 if (e->signal_sources)
3276 s = e->signal_sources[si.ssi_signo];
3277 if (!s)
3278 continue;
3279 if (s->pending)
3280 continue;
3281
3282 s->signal.siginfo = si;
3283 d->current = s;
3284
3285 r = source_set_pending(s, true);
3286 if (r < 0)
3287 return r;
3288 if (r > 0 && *min_priority >= s->priority) {
3289 *min_priority = s->priority;
3290 return 1; /* an event source with smaller priority is queued. */
3291 }
3292
3293 return 0;
3294 }
3295 }
3296
3297 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents, int64_t threshold) {
3298 ssize_t n;
3299
3300 assert(e);
3301 assert(d);
3302
3303 assert_return(revents == EPOLLIN, -EIO);
3304
3305 /* If there's already an event source pending for this priority, don't read another */
3306 if (d->n_pending > 0)
3307 return 0;
3308
3309 /* Is the read buffer non-empty? If so, let's not read more */
3310 if (d->buffer_filled > 0)
3311 return 0;
3312
3313 if (d->priority > threshold)
3314 return 0;
3315
3316 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3317 if (n < 0) {
3318 if (IN_SET(errno, EAGAIN, EINTR))
3319 return 0;
3320
3321 return -errno;
3322 }
3323
3324 assert(n > 0);
3325 d->buffer_filled = (size_t) n;
3326 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3327
3328 return 1;
3329 }
3330
3331 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3332 assert(e);
3333 assert(d);
3334 assert(sz <= d->buffer_filled);
3335
3336 if (sz == 0)
3337 return;
3338
3339 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3340 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3341 d->buffer_filled -= sz;
3342
3343 if (d->buffer_filled == 0)
3344 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3345 }
3346
3347 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3348 int r;
3349
3350 assert(e);
3351 assert(d);
3352
3353 /* If there's already an event source pending for this priority, don't read another */
3354 if (d->n_pending > 0)
3355 return 0;
3356
3357 while (d->buffer_filled > 0) {
3358 size_t sz;
3359
3360 /* Let's validate that the event structures are complete */
3361 if (d->buffer_filled < offsetof(struct inotify_event, name))
3362 return -EIO;
3363
3364 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3365 if (d->buffer_filled < sz)
3366 return -EIO;
3367
3368 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3369 struct inode_data *inode_data;
3370
3371 /* The queue overran, let's pass this event to all event sources connected to this inotify
3372 * object */
3373
3374 HASHMAP_FOREACH(inode_data, d->inodes) {
3375 sd_event_source *s;
3376
3377 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3378
3379 if (event_source_is_offline(s))
3380 continue;
3381
3382 r = source_set_pending(s, true);
3383 if (r < 0)
3384 return r;
3385 }
3386 }
3387 } else {
3388 struct inode_data *inode_data;
3389 sd_event_source *s;
3390
3391 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3392 * our watch descriptor table. */
3393 if (d->buffer.ev.mask & IN_IGNORED) {
3394
3395 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3396 if (!inode_data) {
3397 event_inotify_data_drop(e, d, sz);
3398 continue;
3399 }
3400
3401 /* The watch descriptor was removed by the kernel, let's drop it here too */
3402 inode_data->wd = -1;
3403 } else {
3404 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3405 if (!inode_data) {
3406 event_inotify_data_drop(e, d, sz);
3407 continue;
3408 }
3409 }
3410
3411 /* Trigger all event sources that are interested in these events. Also trigger all event
3412 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3413 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3414
3415 if (event_source_is_offline(s))
3416 continue;
3417
3418 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3419 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3420 continue;
3421
3422 r = source_set_pending(s, true);
3423 if (r < 0)
3424 return r;
3425 }
3426 }
3427
3428 /* Something pending now? If so, let's finish, otherwise let's read more. */
3429 if (d->n_pending > 0)
3430 return 1;
3431 }
3432
3433 return 0;
3434 }
3435
3436 static int process_inotify(sd_event *e) {
3437 struct inotify_data *d;
3438 int r, done = 0;
3439
3440 assert(e);
3441
3442 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3443 r = event_inotify_data_process(e, d);
3444 if (r < 0)
3445 return r;
3446 if (r > 0)
3447 done ++;
3448 }
3449
3450 return done;
3451 }
3452
3453 static int source_dispatch(sd_event_source *s) {
3454 _cleanup_(sd_event_unrefp) sd_event *saved_event = NULL;
3455 EventSourceType saved_type;
3456 int r = 0;
3457
3458 assert(s);
3459 assert(s->pending || s->type == SOURCE_EXIT);
3460
3461 /* Save the event source type, here, so that we still know it after the event callback which might
3462 * invalidate the event. */
3463 saved_type = s->type;
3464
3465 /* Similar, store a reference to the event loop object, so that we can still access it after the
3466 * callback might have invalidated/disconnected the event source. */
3467 saved_event = sd_event_ref(s->event);
3468
3469 /* Check if we hit the ratelimit for this event source, if so, let's disable it. */
3470 assert(!s->ratelimited);
3471 if (!ratelimit_below(&s->rate_limit)) {
3472 r = event_source_enter_ratelimited(s);
3473 if (r < 0)
3474 return r;
3475
3476 return 1;
3477 }
3478
3479 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3480 r = source_set_pending(s, false);
3481 if (r < 0)
3482 return r;
3483 }
3484
3485 if (s->type != SOURCE_POST) {
3486 sd_event_source *z;
3487
3488 /* If we execute a non-post source, let's mark all
3489 * post sources as pending */
3490
3491 SET_FOREACH(z, s->event->post_sources) {
3492 if (event_source_is_offline(z))
3493 continue;
3494
3495 r = source_set_pending(z, true);
3496 if (r < 0)
3497 return r;
3498 }
3499 }
3500
3501 if (s->enabled == SD_EVENT_ONESHOT) {
3502 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3503 if (r < 0)
3504 return r;
3505 }
3506
3507 s->dispatching = true;
3508
3509 switch (s->type) {
3510
3511 case SOURCE_IO:
3512 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3513 break;
3514
3515 case SOURCE_TIME_REALTIME:
3516 case SOURCE_TIME_BOOTTIME:
3517 case SOURCE_TIME_MONOTONIC:
3518 case SOURCE_TIME_REALTIME_ALARM:
3519 case SOURCE_TIME_BOOTTIME_ALARM:
3520 r = s->time.callback(s, s->time.next, s->userdata);
3521 break;
3522
3523 case SOURCE_SIGNAL:
3524 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3525 break;
3526
3527 case SOURCE_CHILD: {
3528 bool zombie;
3529
3530 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3531
3532 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3533
3534 /* Now, reap the PID for good. */
3535 if (zombie) {
3536 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3537 s->child.waited = true;
3538 }
3539
3540 break;
3541 }
3542
3543 case SOURCE_DEFER:
3544 r = s->defer.callback(s, s->userdata);
3545 break;
3546
3547 case SOURCE_POST:
3548 r = s->post.callback(s, s->userdata);
3549 break;
3550
3551 case SOURCE_EXIT:
3552 r = s->exit.callback(s, s->userdata);
3553 break;
3554
3555 case SOURCE_INOTIFY: {
3556 struct sd_event *e = s->event;
3557 struct inotify_data *d;
3558 size_t sz;
3559
3560 assert(s->inotify.inode_data);
3561 assert_se(d = s->inotify.inode_data->inotify_data);
3562
3563 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3564 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3565 assert(d->buffer_filled >= sz);
3566
3567 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3568
3569 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3570 * buffer. */
3571 if (d->n_pending == 0)
3572 event_inotify_data_drop(e, d, sz);
3573
3574 break;
3575 }
3576
3577 case SOURCE_WATCHDOG:
3578 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3579 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3580 assert_not_reached("Wut? I shouldn't exist.");
3581 }
3582
3583 s->dispatching = false;
3584
3585 if (r < 0) {
3586 log_debug_errno(r, "Event source %s (type %s) returned error, %s: %m",
3587 strna(s->description),
3588 event_source_type_to_string(saved_type),
3589 s->exit_on_failure ? "exiting" : "disabling");
3590
3591 if (s->exit_on_failure)
3592 (void) sd_event_exit(saved_event, r);
3593 }
3594
3595 if (s->n_ref == 0)
3596 source_free(s);
3597 else if (r < 0)
3598 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3599
3600 return 1;
3601 }
3602
3603 static int event_prepare(sd_event *e) {
3604 int r;
3605
3606 assert(e);
3607
3608 for (;;) {
3609 sd_event_source *s;
3610
3611 s = prioq_peek(e->prepare);
3612 if (!s || s->prepare_iteration == e->iteration || event_source_is_offline(s))
3613 break;
3614
3615 s->prepare_iteration = e->iteration;
3616 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3617 if (r < 0)
3618 return r;
3619
3620 assert(s->prepare);
3621
3622 s->dispatching = true;
3623 r = s->prepare(s, s->userdata);
3624 s->dispatching = false;
3625
3626 if (r < 0) {
3627 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, %s: %m",
3628 strna(s->description),
3629 event_source_type_to_string(s->type),
3630 s->exit_on_failure ? "exiting" : "disabling");
3631
3632 if (s->exit_on_failure)
3633 (void) sd_event_exit(e, r);
3634 }
3635
3636 if (s->n_ref == 0)
3637 source_free(s);
3638 else if (r < 0)
3639 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3640 }
3641
3642 return 0;
3643 }
3644
3645 static int dispatch_exit(sd_event *e) {
3646 sd_event_source *p;
3647 int r;
3648
3649 assert(e);
3650
3651 p = prioq_peek(e->exit);
3652 if (!p || event_source_is_offline(p)) {
3653 e->state = SD_EVENT_FINISHED;
3654 return 0;
3655 }
3656
3657 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3658 e->iteration++;
3659 e->state = SD_EVENT_EXITING;
3660 r = source_dispatch(p);
3661 e->state = SD_EVENT_INITIAL;
3662 return r;
3663 }
3664
3665 static sd_event_source* event_next_pending(sd_event *e) {
3666 sd_event_source *p;
3667
3668 assert(e);
3669
3670 p = prioq_peek(e->pending);
3671 if (!p)
3672 return NULL;
3673
3674 if (event_source_is_offline(p))
3675 return NULL;
3676
3677 return p;
3678 }
3679
3680 static int arm_watchdog(sd_event *e) {
3681 struct itimerspec its = {};
3682 usec_t t;
3683
3684 assert(e);
3685 assert(e->watchdog_fd >= 0);
3686
3687 t = sleep_between(e,
3688 e->watchdog_last + (e->watchdog_period / 2),
3689 e->watchdog_last + (e->watchdog_period * 3 / 4));
3690
3691 timespec_store(&its.it_value, t);
3692
3693 /* Make sure we never set the watchdog to 0, which tells the
3694 * kernel to disable it. */
3695 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3696 its.it_value.tv_nsec = 1;
3697
3698 if (timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL) < 0)
3699 return -errno;
3700
3701 return 0;
3702 }
3703
3704 static int process_watchdog(sd_event *e) {
3705 assert(e);
3706
3707 if (!e->watchdog)
3708 return 0;
3709
3710 /* Don't notify watchdog too often */
3711 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3712 return 0;
3713
3714 sd_notify(false, "WATCHDOG=1");
3715 e->watchdog_last = e->timestamp.monotonic;
3716
3717 return arm_watchdog(e);
3718 }
3719
3720 static void event_close_inode_data_fds(sd_event *e) {
3721 struct inode_data *d;
3722
3723 assert(e);
3724
3725 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3726 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3727 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3728 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3729 * compromise. */
3730
3731 while ((d = e->inode_data_to_close)) {
3732 assert(d->fd >= 0);
3733 d->fd = safe_close(d->fd);
3734
3735 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3736 }
3737 }
3738
3739 _public_ int sd_event_prepare(sd_event *e) {
3740 int r;
3741
3742 assert_return(e, -EINVAL);
3743 assert_return(e = event_resolve(e), -ENOPKG);
3744 assert_return(!event_pid_changed(e), -ECHILD);
3745 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3746 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3747
3748 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3749 * this check here once, since gettid() is typically not cached, and thus want to minimize
3750 * syscalls */
3751 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3752
3753 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
3754 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
3755
3756 if (e->exit_requested)
3757 goto pending;
3758
3759 e->iteration++;
3760
3761 e->state = SD_EVENT_PREPARING;
3762 r = event_prepare(e);
3763 e->state = SD_EVENT_INITIAL;
3764 if (r < 0)
3765 return r;
3766
3767 r = event_arm_timer(e, &e->realtime);
3768 if (r < 0)
3769 return r;
3770
3771 r = event_arm_timer(e, &e->boottime);
3772 if (r < 0)
3773 return r;
3774
3775 r = event_arm_timer(e, &e->monotonic);
3776 if (r < 0)
3777 return r;
3778
3779 r = event_arm_timer(e, &e->realtime_alarm);
3780 if (r < 0)
3781 return r;
3782
3783 r = event_arm_timer(e, &e->boottime_alarm);
3784 if (r < 0)
3785 return r;
3786
3787 event_close_inode_data_fds(e);
3788
3789 if (event_next_pending(e) || e->need_process_child)
3790 goto pending;
3791
3792 e->state = SD_EVENT_ARMED;
3793
3794 return 0;
3795
3796 pending:
3797 e->state = SD_EVENT_ARMED;
3798 r = sd_event_wait(e, 0);
3799 if (r == 0)
3800 e->state = SD_EVENT_ARMED;
3801
3802 return r;
3803 }
3804
3805 static int epoll_wait_usec(
3806 int fd,
3807 struct epoll_event *events,
3808 int maxevents,
3809 usec_t timeout) {
3810
3811 int r, msec;
3812 #if 0
3813 static bool epoll_pwait2_absent = false;
3814
3815 /* A wrapper that uses epoll_pwait2() if available, and falls back to epoll_wait() if not.
3816 *
3817 * FIXME: this is temporarily disabled until epoll_pwait2() becomes more widely available.
3818 * See https://github.com/systemd/systemd/pull/18973 and
3819 * https://github.com/systemd/systemd/issues/19052. */
3820
3821 if (!epoll_pwait2_absent && timeout != USEC_INFINITY) {
3822 struct timespec ts;
3823
3824 r = epoll_pwait2(fd,
3825 events,
3826 maxevents,
3827 timespec_store(&ts, timeout),
3828 NULL);
3829 if (r >= 0)
3830 return r;
3831 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
3832 return -errno; /* Only fallback to old epoll_wait() if the syscall is masked or not
3833 * supported. */
3834
3835 epoll_pwait2_absent = true;
3836 }
3837 #endif
3838
3839 if (timeout == USEC_INFINITY)
3840 msec = -1;
3841 else {
3842 usec_t k;
3843
3844 k = DIV_ROUND_UP(timeout, USEC_PER_MSEC);
3845 if (k >= INT_MAX)
3846 msec = INT_MAX; /* Saturate */
3847 else
3848 msec = (int) k;
3849 }
3850
3851 r = epoll_wait(fd,
3852 events,
3853 maxevents,
3854 msec);
3855 if (r < 0)
3856 return -errno;
3857
3858 return r;
3859 }
3860
3861 static int process_epoll(sd_event *e, usec_t timeout, int64_t threshold, int64_t *ret_min_priority) {
3862 int64_t min_priority = threshold;
3863 bool something_new = false;
3864 size_t n_event_queue, m;
3865 int r;
3866
3867 assert(e);
3868 assert(ret_min_priority);
3869
3870 n_event_queue = MAX(e->n_sources, 1u);
3871 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, n_event_queue))
3872 return -ENOMEM;
3873
3874 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3875 if (e->inotify_data_buffered)
3876 timeout = 0;
3877
3878 for (;;) {
3879 r = epoll_wait_usec(e->epoll_fd, e->event_queue, e->event_queue_allocated, timeout);
3880 if (r < 0)
3881 return r;
3882
3883 m = (size_t) r;
3884
3885 if (m < e->event_queue_allocated)
3886 break;
3887
3888 if (e->event_queue_allocated >= n_event_queue * 10)
3889 break;
3890
3891 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, e->event_queue_allocated + n_event_queue))
3892 return -ENOMEM;
3893
3894 timeout = 0;
3895 }
3896
3897 /* Set timestamp only when this is called first time. */
3898 if (threshold == INT64_MAX)
3899 triple_timestamp_get(&e->timestamp);
3900
3901 for (size_t i = 0; i < m; i++) {
3902
3903 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3904 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
3905 else {
3906 WakeupType *t = e->event_queue[i].data.ptr;
3907
3908 switch (*t) {
3909
3910 case WAKEUP_EVENT_SOURCE: {
3911 sd_event_source *s = e->event_queue[i].data.ptr;
3912
3913 assert(s);
3914
3915 if (s->priority > threshold)
3916 continue;
3917
3918 min_priority = MIN(min_priority, s->priority);
3919
3920 switch (s->type) {
3921
3922 case SOURCE_IO:
3923 r = process_io(e, s, e->event_queue[i].events);
3924 break;
3925
3926 case SOURCE_CHILD:
3927 r = process_pidfd(e, s, e->event_queue[i].events);
3928 break;
3929
3930 default:
3931 assert_not_reached("Unexpected event source type");
3932 }
3933
3934 break;
3935 }
3936
3937 case WAKEUP_CLOCK_DATA: {
3938 struct clock_data *d = e->event_queue[i].data.ptr;
3939
3940 assert(d);
3941
3942 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
3943 break;
3944 }
3945
3946 case WAKEUP_SIGNAL_DATA:
3947 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events, &min_priority);
3948 break;
3949
3950 case WAKEUP_INOTIFY_DATA:
3951 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events, threshold);
3952 break;
3953
3954 default:
3955 assert_not_reached("Invalid wake-up pointer");
3956 }
3957 }
3958 if (r < 0)
3959 return r;
3960 if (r > 0)
3961 something_new = true;
3962 }
3963
3964 *ret_min_priority = min_priority;
3965 return something_new;
3966 }
3967
3968 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3969 int r;
3970
3971 assert_return(e, -EINVAL);
3972 assert_return(e = event_resolve(e), -ENOPKG);
3973 assert_return(!event_pid_changed(e), -ECHILD);
3974 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3975 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3976
3977 if (e->exit_requested) {
3978 e->state = SD_EVENT_PENDING;
3979 return 1;
3980 }
3981
3982 for (int64_t threshold = INT64_MAX; ; threshold--) {
3983 int64_t epoll_min_priority, child_min_priority;
3984
3985 /* There may be a possibility that new epoll (especially IO) and child events are
3986 * triggered just after process_epoll() call but before process_child(), and the new IO
3987 * events may have higher priority than the child events. To salvage these events,
3988 * let's call epoll_wait() again, but accepts only events with higher priority than the
3989 * previous. See issue https://github.com/systemd/systemd/issues/18190 and comments
3990 * https://github.com/systemd/systemd/pull/18750#issuecomment-785801085
3991 * https://github.com/systemd/systemd/pull/18922#issuecomment-792825226 */
3992
3993 r = process_epoll(e, timeout, threshold, &epoll_min_priority);
3994 if (r == -EINTR) {
3995 e->state = SD_EVENT_PENDING;
3996 return 1;
3997 }
3998 if (r < 0)
3999 goto finish;
4000 if (r == 0 && threshold < INT64_MAX)
4001 /* No new epoll event. */
4002 break;
4003
4004 r = process_child(e, threshold, &child_min_priority);
4005 if (r < 0)
4006 goto finish;
4007 if (r == 0)
4008 /* No new child event. */
4009 break;
4010
4011 threshold = MIN(epoll_min_priority, child_min_priority);
4012 if (threshold == INT64_MIN)
4013 break;
4014
4015 timeout = 0;
4016 }
4017
4018 r = process_watchdog(e);
4019 if (r < 0)
4020 goto finish;
4021
4022 r = process_timer(e, e->timestamp.realtime, &e->realtime);
4023 if (r < 0)
4024 goto finish;
4025
4026 r = process_timer(e, e->timestamp.boottime, &e->boottime);
4027 if (r < 0)
4028 goto finish;
4029
4030 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
4031 if (r < 0)
4032 goto finish;
4033
4034 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
4035 if (r < 0)
4036 goto finish;
4037
4038 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
4039 if (r < 0)
4040 goto finish;
4041
4042 r = process_inotify(e);
4043 if (r < 0)
4044 goto finish;
4045
4046 if (event_next_pending(e)) {
4047 e->state = SD_EVENT_PENDING;
4048 return 1;
4049 }
4050
4051 r = 0;
4052
4053 finish:
4054 e->state = SD_EVENT_INITIAL;
4055
4056 return r;
4057 }
4058
4059 _public_ int sd_event_dispatch(sd_event *e) {
4060 sd_event_source *p;
4061 int r;
4062
4063 assert_return(e, -EINVAL);
4064 assert_return(e = event_resolve(e), -ENOPKG);
4065 assert_return(!event_pid_changed(e), -ECHILD);
4066 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4067 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
4068
4069 if (e->exit_requested)
4070 return dispatch_exit(e);
4071
4072 p = event_next_pending(e);
4073 if (p) {
4074 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4075
4076 e->state = SD_EVENT_RUNNING;
4077 r = source_dispatch(p);
4078 e->state = SD_EVENT_INITIAL;
4079 return r;
4080 }
4081
4082 e->state = SD_EVENT_INITIAL;
4083
4084 return 1;
4085 }
4086
4087 static void event_log_delays(sd_event *e) {
4088 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
4089 size_t l, i;
4090
4091 p = b;
4092 l = sizeof(b);
4093 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
4094 l = strpcpyf(&p, l, "%u ", e->delays[i]);
4095 e->delays[i] = 0;
4096 }
4097 log_debug("Event loop iterations: %s", b);
4098 }
4099
4100 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
4101 int r;
4102
4103 assert_return(e, -EINVAL);
4104 assert_return(e = event_resolve(e), -ENOPKG);
4105 assert_return(!event_pid_changed(e), -ECHILD);
4106 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4107 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
4108
4109 if (e->profile_delays && e->last_run_usec != 0) {
4110 usec_t this_run;
4111 unsigned l;
4112
4113 this_run = now(CLOCK_MONOTONIC);
4114
4115 l = u64log2(this_run - e->last_run_usec);
4116 assert(l < ELEMENTSOF(e->delays));
4117 e->delays[l]++;
4118
4119 if (this_run - e->last_log_usec >= 5*USEC_PER_SEC) {
4120 event_log_delays(e);
4121 e->last_log_usec = this_run;
4122 }
4123 }
4124
4125 /* Make sure that none of the preparation callbacks ends up freeing the event source under our feet */
4126 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = sd_event_ref(e);
4127
4128 r = sd_event_prepare(e);
4129 if (r == 0)
4130 /* There was nothing? Then wait... */
4131 r = sd_event_wait(e, timeout);
4132
4133 if (e->profile_delays)
4134 e->last_run_usec = now(CLOCK_MONOTONIC);
4135
4136 if (r > 0) {
4137 /* There's something now, then let's dispatch it */
4138 r = sd_event_dispatch(e);
4139 if (r < 0)
4140 return r;
4141
4142 return 1;
4143 }
4144
4145 return r;
4146 }
4147
4148 _public_ int sd_event_loop(sd_event *e) {
4149 int r;
4150
4151 assert_return(e, -EINVAL);
4152 assert_return(e = event_resolve(e), -ENOPKG);
4153 assert_return(!event_pid_changed(e), -ECHILD);
4154 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
4155
4156 _unused_ _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
4157
4158 while (e->state != SD_EVENT_FINISHED) {
4159 r = sd_event_run(e, UINT64_MAX);
4160 if (r < 0)
4161 return r;
4162 }
4163
4164 return e->exit_code;
4165 }
4166
4167 _public_ int sd_event_get_fd(sd_event *e) {
4168 assert_return(e, -EINVAL);
4169 assert_return(e = event_resolve(e), -ENOPKG);
4170 assert_return(!event_pid_changed(e), -ECHILD);
4171
4172 return e->epoll_fd;
4173 }
4174
4175 _public_ int sd_event_get_state(sd_event *e) {
4176 assert_return(e, -EINVAL);
4177 assert_return(e = event_resolve(e), -ENOPKG);
4178 assert_return(!event_pid_changed(e), -ECHILD);
4179
4180 return e->state;
4181 }
4182
4183 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
4184 assert_return(e, -EINVAL);
4185 assert_return(e = event_resolve(e), -ENOPKG);
4186 assert_return(code, -EINVAL);
4187 assert_return(!event_pid_changed(e), -ECHILD);
4188
4189 if (!e->exit_requested)
4190 return -ENODATA;
4191
4192 *code = e->exit_code;
4193 return 0;
4194 }
4195
4196 _public_ int sd_event_exit(sd_event *e, int code) {
4197 assert_return(e, -EINVAL);
4198 assert_return(e = event_resolve(e), -ENOPKG);
4199 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
4200 assert_return(!event_pid_changed(e), -ECHILD);
4201
4202 e->exit_requested = true;
4203 e->exit_code = code;
4204
4205 return 0;
4206 }
4207
4208 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
4209 assert_return(e, -EINVAL);
4210 assert_return(e = event_resolve(e), -ENOPKG);
4211 assert_return(usec, -EINVAL);
4212 assert_return(!event_pid_changed(e), -ECHILD);
4213
4214 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
4215 return -EOPNOTSUPP;
4216
4217 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
4218 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
4219 * the purpose of getting the time this doesn't matter. */
4220 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
4221 return -EOPNOTSUPP;
4222
4223 if (!triple_timestamp_is_set(&e->timestamp)) {
4224 /* Implicitly fall back to now() if we never ran before and thus have no cached time. */
4225 *usec = now(clock);
4226 return 1;
4227 }
4228
4229 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
4230 return 0;
4231 }
4232
4233 _public_ int sd_event_default(sd_event **ret) {
4234 sd_event *e = NULL;
4235 int r;
4236
4237 if (!ret)
4238 return !!default_event;
4239
4240 if (default_event) {
4241 *ret = sd_event_ref(default_event);
4242 return 0;
4243 }
4244
4245 r = sd_event_new(&e);
4246 if (r < 0)
4247 return r;
4248
4249 e->default_event_ptr = &default_event;
4250 e->tid = gettid();
4251 default_event = e;
4252
4253 *ret = e;
4254 return 1;
4255 }
4256
4257 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
4258 assert_return(e, -EINVAL);
4259 assert_return(e = event_resolve(e), -ENOPKG);
4260 assert_return(tid, -EINVAL);
4261 assert_return(!event_pid_changed(e), -ECHILD);
4262
4263 if (e->tid != 0) {
4264 *tid = e->tid;
4265 return 0;
4266 }
4267
4268 return -ENXIO;
4269 }
4270
4271 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
4272 int r;
4273
4274 assert_return(e, -EINVAL);
4275 assert_return(e = event_resolve(e), -ENOPKG);
4276 assert_return(!event_pid_changed(e), -ECHILD);
4277
4278 if (e->watchdog == !!b)
4279 return e->watchdog;
4280
4281 if (b) {
4282 r = sd_watchdog_enabled(false, &e->watchdog_period);
4283 if (r <= 0)
4284 return r;
4285
4286 /* Issue first ping immediately */
4287 sd_notify(false, "WATCHDOG=1");
4288 e->watchdog_last = now(CLOCK_MONOTONIC);
4289
4290 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
4291 if (e->watchdog_fd < 0)
4292 return -errno;
4293
4294 r = arm_watchdog(e);
4295 if (r < 0)
4296 goto fail;
4297
4298 struct epoll_event ev = {
4299 .events = EPOLLIN,
4300 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
4301 };
4302
4303 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev) < 0) {
4304 r = -errno;
4305 goto fail;
4306 }
4307
4308 } else {
4309 if (e->watchdog_fd >= 0) {
4310 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
4311 e->watchdog_fd = safe_close(e->watchdog_fd);
4312 }
4313 }
4314
4315 e->watchdog = !!b;
4316 return e->watchdog;
4317
4318 fail:
4319 e->watchdog_fd = safe_close(e->watchdog_fd);
4320 return r;
4321 }
4322
4323 _public_ int sd_event_get_watchdog(sd_event *e) {
4324 assert_return(e, -EINVAL);
4325 assert_return(e = event_resolve(e), -ENOPKG);
4326 assert_return(!event_pid_changed(e), -ECHILD);
4327
4328 return e->watchdog;
4329 }
4330
4331 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
4332 assert_return(e, -EINVAL);
4333 assert_return(e = event_resolve(e), -ENOPKG);
4334 assert_return(!event_pid_changed(e), -ECHILD);
4335
4336 *ret = e->iteration;
4337 return 0;
4338 }
4339
4340 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
4341 assert_return(s, -EINVAL);
4342
4343 s->destroy_callback = callback;
4344 return 0;
4345 }
4346
4347 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
4348 assert_return(s, -EINVAL);
4349
4350 if (ret)
4351 *ret = s->destroy_callback;
4352
4353 return !!s->destroy_callback;
4354 }
4355
4356 _public_ int sd_event_source_get_floating(sd_event_source *s) {
4357 assert_return(s, -EINVAL);
4358
4359 return s->floating;
4360 }
4361
4362 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
4363 assert_return(s, -EINVAL);
4364
4365 if (s->floating == !!b)
4366 return 0;
4367
4368 if (!s->event) /* Already disconnected */
4369 return -ESTALE;
4370
4371 s->floating = b;
4372
4373 if (b) {
4374 sd_event_source_ref(s);
4375 sd_event_unref(s->event);
4376 } else {
4377 sd_event_ref(s->event);
4378 sd_event_source_unref(s);
4379 }
4380
4381 return 1;
4382 }
4383
4384 _public_ int sd_event_source_get_exit_on_failure(sd_event_source *s) {
4385 assert_return(s, -EINVAL);
4386 assert_return(s->type != SOURCE_EXIT, -EDOM);
4387
4388 return s->exit_on_failure;
4389 }
4390
4391 _public_ int sd_event_source_set_exit_on_failure(sd_event_source *s, int b) {
4392 assert_return(s, -EINVAL);
4393 assert_return(s->type != SOURCE_EXIT, -EDOM);
4394
4395 if (s->exit_on_failure == !!b)
4396 return 0;
4397
4398 s->exit_on_failure = b;
4399 return 1;
4400 }
4401
4402 _public_ int sd_event_source_set_ratelimit(sd_event_source *s, uint64_t interval, unsigned burst) {
4403 int r;
4404
4405 assert_return(s, -EINVAL);
4406
4407 /* Turning on ratelimiting on event source types that don't support it, is a loggable offense. Doing
4408 * so is a programming error. */
4409 assert_return(EVENT_SOURCE_CAN_RATE_LIMIT(s->type), -EDOM);
4410
4411 /* When ratelimiting is configured we'll always reset the rate limit state first and start fresh,
4412 * non-ratelimited. */
4413 r = event_source_leave_ratelimit(s);
4414 if (r < 0)
4415 return r;
4416
4417 s->rate_limit = (RateLimit) { interval, burst };
4418 return 0;
4419 }
4420
4421 _public_ int sd_event_source_get_ratelimit(sd_event_source *s, uint64_t *ret_interval, unsigned *ret_burst) {
4422 assert_return(s, -EINVAL);
4423
4424 /* Querying whether an event source has ratelimiting configured is not a loggable offsense, hence
4425 * don't use assert_return(). Unlike turning on ratelimiting it's not really a programming error */
4426 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4427 return -EDOM;
4428
4429 if (!ratelimit_configured(&s->rate_limit))
4430 return -ENOEXEC;
4431
4432 if (ret_interval)
4433 *ret_interval = s->rate_limit.interval;
4434 if (ret_burst)
4435 *ret_burst = s->rate_limit.burst;
4436
4437 return 0;
4438 }
4439
4440 _public_ int sd_event_source_is_ratelimited(sd_event_source *s) {
4441 assert_return(s, -EINVAL);
4442
4443 if (!EVENT_SOURCE_CAN_RATE_LIMIT(s->type))
4444 return false;
4445
4446 if (!ratelimit_configured(&s->rate_limit))
4447 return false;
4448
4449 return s->ratelimited;
4450 }