]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Merge pull request #14401 from DaanDeMeyer/nspawn-move-veth-back-to-host
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "env-util.h"
13 #include "event-source.h"
14 #include "fd-util.h"
15 #include "fs-util.h"
16 #include "hashmap.h"
17 #include "list.h"
18 #include "macro.h"
19 #include "memory-util.h"
20 #include "missing_syscall.h"
21 #include "prioq.h"
22 #include "process-util.h"
23 #include "set.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "strxcpyx.h"
28 #include "time-util.h"
29
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38 }
39
40 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
53 [SOURCE_INOTIFY] = "inotify",
54 };
55
56 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event {
61 unsigned n_ref;
62
63 int epoll_fd;
64 int watchdog_fd;
65
66 Prioq *pending;
67 Prioq *prepare;
68
69 /* timerfd_create() only supports these five clocks so far. We
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
73 struct clock_data boottime;
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
77
78 usec_t perturb;
79
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
82
83 Hashmap *child_sources;
84 unsigned n_enabled_child_sources;
85
86 Set *post_sources;
87
88 Prioq *exit;
89
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
98 pid_t original_pid;
99
100 uint64_t iteration;
101 triple_timestamp timestamp;
102 int state;
103
104 bool exit_requested:1;
105 bool need_process_child:1;
106 bool watchdog:1;
107 bool profile_delays:1;
108
109 int exit_code;
110
111 pid_t tid;
112 sd_event **default_event_ptr;
113
114 usec_t watchdog_last, watchdog_period;
115
116 unsigned n_sources;
117
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
121 LIST_HEAD(sd_event_source, sources);
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
125 };
126
127 static thread_local sd_event *default_event = NULL;
128
129 static void source_disconnect(sd_event_source *s);
130 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
131
132 static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134 }
135
136 static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
138 int r;
139
140 assert(x->pending);
141 assert(y->pending);
142
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
145 return -1;
146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
147 return 1;
148
149 /* Lower priority values first */
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
153
154 /* Older entries first */
155 return CMP(x->pending_iteration, y->pending_iteration);
156 }
157
158 static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
160 int r;
161
162 assert(x->prepare);
163 assert(y->prepare);
164
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
177
178 /* Lower priority values first */
179 return CMP(x->priority, y->priority);
180 }
181
182 static int earliest_time_prioq_compare(const void *a, const void *b) {
183 const sd_event_source *x = a, *y = b;
184
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
187
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190 return -1;
191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
201 return CMP(x->time.next, y->time.next);
202 }
203
204 static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206 }
207
208 static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
213
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
216 return -1;
217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
228 }
229
230 static int exit_prioq_compare(const void *a, const void *b) {
231 const sd_event_source *x = a, *y = b;
232
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
235
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
238 return -1;
239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
240 return 1;
241
242 /* Lower priority values first */
243 return CMP(x->priority, y->priority);
244 }
245
246 static void free_clock_data(struct clock_data *d) {
247 assert(d);
248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253 }
254
255 static sd_event *event_free(sd_event *e) {
256 sd_event_source *s;
257
258 assert(e);
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
266 assert(e->n_sources == 0);
267
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
271 safe_close(e->epoll_fd);
272 safe_close(e->watchdog_fd);
273
274 free_clock_data(&e->realtime);
275 free_clock_data(&e->boottime);
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
282 prioq_free(e->exit);
283
284 free(e->signal_sources);
285 hashmap_free(e->signal_data);
286
287 hashmap_free(e->inotify_data);
288
289 hashmap_free(e->child_sources);
290 set_free(e->post_sources);
291
292 free(e->event_queue);
293
294 return mfree(e);
295 }
296
297 _public_ int sd_event_new(sd_event** ret) {
298 sd_event *e;
299 int r;
300
301 assert_return(ret, -EINVAL);
302
303 e = new(sd_event, 1);
304 if (!e)
305 return -ENOMEM;
306
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
329
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
332 goto fail;
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
344 e->profile_delays = true;
345 }
346
347 *ret = e;
348 return 0;
349
350 fail:
351 event_free(e);
352 return r;
353 }
354
355 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
356
357 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361 }
362
363 static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
366 /* We don't support people creating an event loop and keeping
367 * it around over a fork(). Let's complain. */
368
369 return e->original_pid != getpid_cached();
370 }
371
372 static void source_io_unregister(sd_event_source *s) {
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
376 if (event_pid_changed(s->event))
377 return;
378
379 if (!s->io.registered)
380 return;
381
382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
385
386 s->io.registered = false;
387 }
388
389 static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
394 struct epoll_event ev;
395 int r;
396
397 assert(s);
398 assert(s->type == SOURCE_IO);
399 assert(enabled != SD_EVENT_OFF);
400
401 ev = (struct epoll_event) {
402 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
403 .data.ptr = s,
404 };
405
406 if (s->io.registered)
407 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
408 else
409 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
410 if (r < 0)
411 return -errno;
412
413 s->io.registered = true;
414
415 return 0;
416 }
417
418 static void source_child_pidfd_unregister(sd_event_source *s) {
419 assert(s);
420 assert(s->type == SOURCE_CHILD);
421
422 if (event_pid_changed(s->event))
423 return;
424
425 if (!s->child.registered)
426 return;
427
428 if (EVENT_SOURCE_WATCH_PIDFD(s))
429 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
430 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
431 strna(s->description), event_source_type_to_string(s->type));
432
433 s->child.registered = false;
434 }
435
436 static int source_child_pidfd_register(sd_event_source *s, int enabled) {
437 int r;
438
439 assert(s);
440 assert(s->type == SOURCE_CHILD);
441 assert(enabled != SD_EVENT_OFF);
442
443 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
444 struct epoll_event ev;
445
446 ev = (struct epoll_event) {
447 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
448 .data.ptr = s,
449 };
450
451 if (s->child.registered)
452 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
453 else
454 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
455 if (r < 0)
456 return -errno;
457 }
458
459 s->child.registered = true;
460 return 0;
461 }
462
463 static clockid_t event_source_type_to_clock(EventSourceType t) {
464
465 switch (t) {
466
467 case SOURCE_TIME_REALTIME:
468 return CLOCK_REALTIME;
469
470 case SOURCE_TIME_BOOTTIME:
471 return CLOCK_BOOTTIME;
472
473 case SOURCE_TIME_MONOTONIC:
474 return CLOCK_MONOTONIC;
475
476 case SOURCE_TIME_REALTIME_ALARM:
477 return CLOCK_REALTIME_ALARM;
478
479 case SOURCE_TIME_BOOTTIME_ALARM:
480 return CLOCK_BOOTTIME_ALARM;
481
482 default:
483 return (clockid_t) -1;
484 }
485 }
486
487 static EventSourceType clock_to_event_source_type(clockid_t clock) {
488
489 switch (clock) {
490
491 case CLOCK_REALTIME:
492 return SOURCE_TIME_REALTIME;
493
494 case CLOCK_BOOTTIME:
495 return SOURCE_TIME_BOOTTIME;
496
497 case CLOCK_MONOTONIC:
498 return SOURCE_TIME_MONOTONIC;
499
500 case CLOCK_REALTIME_ALARM:
501 return SOURCE_TIME_REALTIME_ALARM;
502
503 case CLOCK_BOOTTIME_ALARM:
504 return SOURCE_TIME_BOOTTIME_ALARM;
505
506 default:
507 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
508 }
509 }
510
511 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
512 assert(e);
513
514 switch (t) {
515
516 case SOURCE_TIME_REALTIME:
517 return &e->realtime;
518
519 case SOURCE_TIME_BOOTTIME:
520 return &e->boottime;
521
522 case SOURCE_TIME_MONOTONIC:
523 return &e->monotonic;
524
525 case SOURCE_TIME_REALTIME_ALARM:
526 return &e->realtime_alarm;
527
528 case SOURCE_TIME_BOOTTIME_ALARM:
529 return &e->boottime_alarm;
530
531 default:
532 return NULL;
533 }
534 }
535
536 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
537 assert(e);
538
539 if (!d)
540 return;
541
542 hashmap_remove(e->signal_data, &d->priority);
543 safe_close(d->fd);
544 free(d);
545 }
546
547 static int event_make_signal_data(
548 sd_event *e,
549 int sig,
550 struct signal_data **ret) {
551
552 struct epoll_event ev;
553 struct signal_data *d;
554 bool added = false;
555 sigset_t ss_copy;
556 int64_t priority;
557 int r;
558
559 assert(e);
560
561 if (event_pid_changed(e))
562 return -ECHILD;
563
564 if (e->signal_sources && e->signal_sources[sig])
565 priority = e->signal_sources[sig]->priority;
566 else
567 priority = SD_EVENT_PRIORITY_NORMAL;
568
569 d = hashmap_get(e->signal_data, &priority);
570 if (d) {
571 if (sigismember(&d->sigset, sig) > 0) {
572 if (ret)
573 *ret = d;
574 return 0;
575 }
576 } else {
577 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
578 if (r < 0)
579 return r;
580
581 d = new(struct signal_data, 1);
582 if (!d)
583 return -ENOMEM;
584
585 *d = (struct signal_data) {
586 .wakeup = WAKEUP_SIGNAL_DATA,
587 .fd = -1,
588 .priority = priority,
589 };
590
591 r = hashmap_put(e->signal_data, &d->priority, d);
592 if (r < 0) {
593 free(d);
594 return r;
595 }
596
597 added = true;
598 }
599
600 ss_copy = d->sigset;
601 assert_se(sigaddset(&ss_copy, sig) >= 0);
602
603 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
604 if (r < 0) {
605 r = -errno;
606 goto fail;
607 }
608
609 d->sigset = ss_copy;
610
611 if (d->fd >= 0) {
612 if (ret)
613 *ret = d;
614 return 0;
615 }
616
617 d->fd = fd_move_above_stdio(r);
618
619 ev = (struct epoll_event) {
620 .events = EPOLLIN,
621 .data.ptr = d,
622 };
623
624 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
625 if (r < 0) {
626 r = -errno;
627 goto fail;
628 }
629
630 if (ret)
631 *ret = d;
632
633 return 0;
634
635 fail:
636 if (added)
637 event_free_signal_data(e, d);
638
639 return r;
640 }
641
642 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
643 assert(e);
644 assert(d);
645
646 /* Turns off the specified signal in the signal data
647 * object. If the signal mask of the object becomes empty that
648 * way removes it. */
649
650 if (sigismember(&d->sigset, sig) == 0)
651 return;
652
653 assert_se(sigdelset(&d->sigset, sig) >= 0);
654
655 if (sigisemptyset(&d->sigset)) {
656 /* If all the mask is all-zero we can get rid of the structure */
657 event_free_signal_data(e, d);
658 return;
659 }
660
661 assert(d->fd >= 0);
662
663 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
664 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
665 }
666
667 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
668 struct signal_data *d;
669 static const int64_t zero_priority = 0;
670
671 assert(e);
672
673 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
674 * and possibly drop the signalfd for it. */
675
676 if (sig == SIGCHLD &&
677 e->n_enabled_child_sources > 0)
678 return;
679
680 if (e->signal_sources &&
681 e->signal_sources[sig] &&
682 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
683 return;
684
685 /*
686 * The specified signal might be enabled in three different queues:
687 *
688 * 1) the one that belongs to the priority passed (if it is non-NULL)
689 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
690 * 3) the 0 priority (to cover the SIGCHLD case)
691 *
692 * Hence, let's remove it from all three here.
693 */
694
695 if (priority) {
696 d = hashmap_get(e->signal_data, priority);
697 if (d)
698 event_unmask_signal_data(e, d, sig);
699 }
700
701 if (e->signal_sources && e->signal_sources[sig]) {
702 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
703 if (d)
704 event_unmask_signal_data(e, d, sig);
705 }
706
707 d = hashmap_get(e->signal_data, &zero_priority);
708 if (d)
709 event_unmask_signal_data(e, d, sig);
710 }
711
712 static void source_disconnect(sd_event_source *s) {
713 sd_event *event;
714
715 assert(s);
716
717 if (!s->event)
718 return;
719
720 assert(s->event->n_sources > 0);
721
722 switch (s->type) {
723
724 case SOURCE_IO:
725 if (s->io.fd >= 0)
726 source_io_unregister(s);
727
728 break;
729
730 case SOURCE_TIME_REALTIME:
731 case SOURCE_TIME_BOOTTIME:
732 case SOURCE_TIME_MONOTONIC:
733 case SOURCE_TIME_REALTIME_ALARM:
734 case SOURCE_TIME_BOOTTIME_ALARM: {
735 struct clock_data *d;
736
737 d = event_get_clock_data(s->event, s->type);
738 assert(d);
739
740 prioq_remove(d->earliest, s, &s->time.earliest_index);
741 prioq_remove(d->latest, s, &s->time.latest_index);
742 d->needs_rearm = true;
743 break;
744 }
745
746 case SOURCE_SIGNAL:
747 if (s->signal.sig > 0) {
748
749 if (s->event->signal_sources)
750 s->event->signal_sources[s->signal.sig] = NULL;
751
752 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
753 }
754
755 break;
756
757 case SOURCE_CHILD:
758 if (s->child.pid > 0) {
759 if (s->enabled != SD_EVENT_OFF) {
760 assert(s->event->n_enabled_child_sources > 0);
761 s->event->n_enabled_child_sources--;
762 }
763
764 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
765 }
766
767 if (EVENT_SOURCE_WATCH_PIDFD(s))
768 source_child_pidfd_unregister(s);
769 else
770 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
771
772 break;
773
774 case SOURCE_DEFER:
775 /* nothing */
776 break;
777
778 case SOURCE_POST:
779 set_remove(s->event->post_sources, s);
780 break;
781
782 case SOURCE_EXIT:
783 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
784 break;
785
786 case SOURCE_INOTIFY: {
787 struct inode_data *inode_data;
788
789 inode_data = s->inotify.inode_data;
790 if (inode_data) {
791 struct inotify_data *inotify_data;
792 assert_se(inotify_data = inode_data->inotify_data);
793
794 /* Detach this event source from the inode object */
795 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
796 s->inotify.inode_data = NULL;
797
798 if (s->pending) {
799 assert(inotify_data->n_pending > 0);
800 inotify_data->n_pending--;
801 }
802
803 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
804 * continued to being watched. That's because inotify doesn't really have an API for that: we
805 * can only change watch masks with access to the original inode either by fd or by path. But
806 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
807 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
808 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
809 * there), but given the need for open_by_handle_at() which is privileged and not universally
810 * available this would be quite an incomplete solution. Hence we go the other way, leave the
811 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
812 * anymore after reception. Yes, this sucks, but … Linux … */
813
814 /* Maybe release the inode data (and its inotify) */
815 event_gc_inode_data(s->event, inode_data);
816 }
817
818 break;
819 }
820
821 default:
822 assert_not_reached("Wut? I shouldn't exist.");
823 }
824
825 if (s->pending)
826 prioq_remove(s->event->pending, s, &s->pending_index);
827
828 if (s->prepare)
829 prioq_remove(s->event->prepare, s, &s->prepare_index);
830
831 event = TAKE_PTR(s->event);
832 LIST_REMOVE(sources, event->sources, s);
833 event->n_sources--;
834
835 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
836 * pidfd associated with this event source, which we'll do only on source_free(). */
837
838 if (!s->floating)
839 sd_event_unref(event);
840 }
841
842 static void source_free(sd_event_source *s) {
843 assert(s);
844
845 source_disconnect(s);
846
847 if (s->type == SOURCE_IO && s->io.owned)
848 s->io.fd = safe_close(s->io.fd);
849
850 if (s->type == SOURCE_CHILD) {
851 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
852
853 if (s->child.process_owned) {
854
855 if (!s->child.exited) {
856 bool sent = false;
857
858 if (s->child.pidfd >= 0) {
859 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
860 if (errno == ESRCH) /* Already dead */
861 sent = true;
862 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
863 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
864 s->child.pid);
865 } else
866 sent = true;
867 }
868
869 if (!sent)
870 if (kill(s->child.pid, SIGKILL) < 0)
871 if (errno != ESRCH) /* Already dead */
872 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
873 s->child.pid);
874 }
875
876 if (!s->child.waited) {
877 siginfo_t si = {};
878
879 /* Reap the child if we can */
880 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
881 }
882 }
883
884 if (s->child.pidfd_owned)
885 s->child.pidfd = safe_close(s->child.pidfd);
886 }
887
888 if (s->destroy_callback)
889 s->destroy_callback(s->userdata);
890
891 free(s->description);
892 free(s);
893 }
894 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
895
896 static int source_set_pending(sd_event_source *s, bool b) {
897 int r;
898
899 assert(s);
900 assert(s->type != SOURCE_EXIT);
901
902 if (s->pending == b)
903 return 0;
904
905 s->pending = b;
906
907 if (b) {
908 s->pending_iteration = s->event->iteration;
909
910 r = prioq_put(s->event->pending, s, &s->pending_index);
911 if (r < 0) {
912 s->pending = false;
913 return r;
914 }
915 } else
916 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
917
918 if (EVENT_SOURCE_IS_TIME(s->type)) {
919 struct clock_data *d;
920
921 d = event_get_clock_data(s->event, s->type);
922 assert(d);
923
924 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
925 prioq_reshuffle(d->latest, s, &s->time.latest_index);
926 d->needs_rearm = true;
927 }
928
929 if (s->type == SOURCE_SIGNAL && !b) {
930 struct signal_data *d;
931
932 d = hashmap_get(s->event->signal_data, &s->priority);
933 if (d && d->current == s)
934 d->current = NULL;
935 }
936
937 if (s->type == SOURCE_INOTIFY) {
938
939 assert(s->inotify.inode_data);
940 assert(s->inotify.inode_data->inotify_data);
941
942 if (b)
943 s->inotify.inode_data->inotify_data->n_pending ++;
944 else {
945 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
946 s->inotify.inode_data->inotify_data->n_pending --;
947 }
948 }
949
950 return 0;
951 }
952
953 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
954 sd_event_source *s;
955
956 assert(e);
957
958 s = new(sd_event_source, 1);
959 if (!s)
960 return NULL;
961
962 *s = (struct sd_event_source) {
963 .n_ref = 1,
964 .event = e,
965 .floating = floating,
966 .type = type,
967 .pending_index = PRIOQ_IDX_NULL,
968 .prepare_index = PRIOQ_IDX_NULL,
969 };
970
971 if (!floating)
972 sd_event_ref(e);
973
974 LIST_PREPEND(sources, e->sources, s);
975 e->n_sources++;
976
977 return s;
978 }
979
980 _public_ int sd_event_add_io(
981 sd_event *e,
982 sd_event_source **ret,
983 int fd,
984 uint32_t events,
985 sd_event_io_handler_t callback,
986 void *userdata) {
987
988 _cleanup_(source_freep) sd_event_source *s = NULL;
989 int r;
990
991 assert_return(e, -EINVAL);
992 assert_return(e = event_resolve(e), -ENOPKG);
993 assert_return(fd >= 0, -EBADF);
994 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
995 assert_return(callback, -EINVAL);
996 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
997 assert_return(!event_pid_changed(e), -ECHILD);
998
999 s = source_new(e, !ret, SOURCE_IO);
1000 if (!s)
1001 return -ENOMEM;
1002
1003 s->wakeup = WAKEUP_EVENT_SOURCE;
1004 s->io.fd = fd;
1005 s->io.events = events;
1006 s->io.callback = callback;
1007 s->userdata = userdata;
1008 s->enabled = SD_EVENT_ON;
1009
1010 r = source_io_register(s, s->enabled, events);
1011 if (r < 0)
1012 return r;
1013
1014 if (ret)
1015 *ret = s;
1016 TAKE_PTR(s);
1017
1018 return 0;
1019 }
1020
1021 static void initialize_perturb(sd_event *e) {
1022 sd_id128_t bootid = {};
1023
1024 /* When we sleep for longer, we try to realign the wakeup to
1025 the same time within each minute/second/250ms, so that
1026 events all across the system can be coalesced into a single
1027 CPU wakeup. However, let's take some system-specific
1028 randomness for this value, so that in a network of systems
1029 with synced clocks timer events are distributed a
1030 bit. Here, we calculate a perturbation usec offset from the
1031 boot ID. */
1032
1033 if (_likely_(e->perturb != USEC_INFINITY))
1034 return;
1035
1036 if (sd_id128_get_boot(&bootid) >= 0)
1037 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1038 }
1039
1040 static int event_setup_timer_fd(
1041 sd_event *e,
1042 struct clock_data *d,
1043 clockid_t clock) {
1044
1045 struct epoll_event ev;
1046 int r, fd;
1047
1048 assert(e);
1049 assert(d);
1050
1051 if (_likely_(d->fd >= 0))
1052 return 0;
1053
1054 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1055 if (fd < 0)
1056 return -errno;
1057
1058 fd = fd_move_above_stdio(fd);
1059
1060 ev = (struct epoll_event) {
1061 .events = EPOLLIN,
1062 .data.ptr = d,
1063 };
1064
1065 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1066 if (r < 0) {
1067 safe_close(fd);
1068 return -errno;
1069 }
1070
1071 d->fd = fd;
1072 return 0;
1073 }
1074
1075 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1076 assert(s);
1077
1078 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1079 }
1080
1081 _public_ int sd_event_add_time(
1082 sd_event *e,
1083 sd_event_source **ret,
1084 clockid_t clock,
1085 uint64_t usec,
1086 uint64_t accuracy,
1087 sd_event_time_handler_t callback,
1088 void *userdata) {
1089
1090 EventSourceType type;
1091 _cleanup_(source_freep) sd_event_source *s = NULL;
1092 struct clock_data *d;
1093 int r;
1094
1095 assert_return(e, -EINVAL);
1096 assert_return(e = event_resolve(e), -ENOPKG);
1097 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1098 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1099 assert_return(!event_pid_changed(e), -ECHILD);
1100
1101 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1102 return -EOPNOTSUPP;
1103
1104 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1105 if (type < 0)
1106 return -EOPNOTSUPP;
1107
1108 if (!callback)
1109 callback = time_exit_callback;
1110
1111 d = event_get_clock_data(e, type);
1112 assert(d);
1113
1114 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1115 if (r < 0)
1116 return r;
1117
1118 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1119 if (r < 0)
1120 return r;
1121
1122 if (d->fd < 0) {
1123 r = event_setup_timer_fd(e, d, clock);
1124 if (r < 0)
1125 return r;
1126 }
1127
1128 s = source_new(e, !ret, type);
1129 if (!s)
1130 return -ENOMEM;
1131
1132 s->time.next = usec;
1133 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1134 s->time.callback = callback;
1135 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1136 s->userdata = userdata;
1137 s->enabled = SD_EVENT_ONESHOT;
1138
1139 d->needs_rearm = true;
1140
1141 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1142 if (r < 0)
1143 return r;
1144
1145 r = prioq_put(d->latest, s, &s->time.latest_index);
1146 if (r < 0)
1147 return r;
1148
1149 if (ret)
1150 *ret = s;
1151 TAKE_PTR(s);
1152
1153 return 0;
1154 }
1155
1156 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1157 assert(s);
1158
1159 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1160 }
1161
1162 _public_ int sd_event_add_signal(
1163 sd_event *e,
1164 sd_event_source **ret,
1165 int sig,
1166 sd_event_signal_handler_t callback,
1167 void *userdata) {
1168
1169 _cleanup_(source_freep) sd_event_source *s = NULL;
1170 struct signal_data *d;
1171 int r;
1172
1173 assert_return(e, -EINVAL);
1174 assert_return(e = event_resolve(e), -ENOPKG);
1175 assert_return(SIGNAL_VALID(sig), -EINVAL);
1176 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1177 assert_return(!event_pid_changed(e), -ECHILD);
1178
1179 if (!callback)
1180 callback = signal_exit_callback;
1181
1182 r = signal_is_blocked(sig);
1183 if (r < 0)
1184 return r;
1185 if (r == 0)
1186 return -EBUSY;
1187
1188 if (!e->signal_sources) {
1189 e->signal_sources = new0(sd_event_source*, _NSIG);
1190 if (!e->signal_sources)
1191 return -ENOMEM;
1192 } else if (e->signal_sources[sig])
1193 return -EBUSY;
1194
1195 s = source_new(e, !ret, SOURCE_SIGNAL);
1196 if (!s)
1197 return -ENOMEM;
1198
1199 s->signal.sig = sig;
1200 s->signal.callback = callback;
1201 s->userdata = userdata;
1202 s->enabled = SD_EVENT_ON;
1203
1204 e->signal_sources[sig] = s;
1205
1206 r = event_make_signal_data(e, sig, &d);
1207 if (r < 0)
1208 return r;
1209
1210 /* Use the signal name as description for the event source by default */
1211 (void) sd_event_source_set_description(s, signal_to_string(sig));
1212
1213 if (ret)
1214 *ret = s;
1215 TAKE_PTR(s);
1216
1217 return 0;
1218 }
1219
1220 static bool shall_use_pidfd(void) {
1221 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1222 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1223 }
1224
1225 _public_ int sd_event_add_child(
1226 sd_event *e,
1227 sd_event_source **ret,
1228 pid_t pid,
1229 int options,
1230 sd_event_child_handler_t callback,
1231 void *userdata) {
1232
1233 _cleanup_(source_freep) sd_event_source *s = NULL;
1234 int r;
1235
1236 assert_return(e, -EINVAL);
1237 assert_return(e = event_resolve(e), -ENOPKG);
1238 assert_return(pid > 1, -EINVAL);
1239 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1240 assert_return(options != 0, -EINVAL);
1241 assert_return(callback, -EINVAL);
1242 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1243 assert_return(!event_pid_changed(e), -ECHILD);
1244
1245 if (e->n_enabled_child_sources == 0) {
1246 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1247 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1248 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1249 * take effect.
1250 *
1251 * (As an optimization we only do this check on the first child event source created.) */
1252 r = signal_is_blocked(SIGCHLD);
1253 if (r < 0)
1254 return r;
1255 if (r == 0)
1256 return -EBUSY;
1257 }
1258
1259 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1260 if (r < 0)
1261 return r;
1262
1263 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1264 return -EBUSY;
1265
1266 s = source_new(e, !ret, SOURCE_CHILD);
1267 if (!s)
1268 return -ENOMEM;
1269
1270 s->wakeup = WAKEUP_EVENT_SOURCE;
1271 s->child.pid = pid;
1272 s->child.options = options;
1273 s->child.callback = callback;
1274 s->userdata = userdata;
1275 s->enabled = SD_EVENT_ONESHOT;
1276
1277 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1278 * pin the PID, and make regular waitid() handling race-free. */
1279
1280 if (shall_use_pidfd()) {
1281 s->child.pidfd = pidfd_open(s->child.pid, 0);
1282 if (s->child.pidfd < 0) {
1283 /* Propagate errors unless the syscall is not supported or blocked */
1284 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1285 return -errno;
1286 } else
1287 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1288 } else
1289 s->child.pidfd = -1;
1290
1291 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1292 if (r < 0)
1293 return r;
1294
1295 e->n_enabled_child_sources++;
1296
1297 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1298 /* We have a pidfd and we only want to watch for exit */
1299
1300 r = source_child_pidfd_register(s, s->enabled);
1301 if (r < 0) {
1302 e->n_enabled_child_sources--;
1303 return r;
1304 }
1305 } else {
1306 /* We have no pidfd or we shall wait for some other event than WEXITED */
1307
1308 r = event_make_signal_data(e, SIGCHLD, NULL);
1309 if (r < 0) {
1310 e->n_enabled_child_sources--;
1311 return r;
1312 }
1313
1314 e->need_process_child = true;
1315 }
1316
1317 if (ret)
1318 *ret = s;
1319
1320 TAKE_PTR(s);
1321 return 0;
1322 }
1323
1324 _public_ int sd_event_add_child_pidfd(
1325 sd_event *e,
1326 sd_event_source **ret,
1327 int pidfd,
1328 int options,
1329 sd_event_child_handler_t callback,
1330 void *userdata) {
1331
1332
1333 _cleanup_(source_freep) sd_event_source *s = NULL;
1334 pid_t pid;
1335 int r;
1336
1337 assert_return(e, -EINVAL);
1338 assert_return(e = event_resolve(e), -ENOPKG);
1339 assert_return(pidfd >= 0, -EBADF);
1340 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1341 assert_return(options != 0, -EINVAL);
1342 assert_return(callback, -EINVAL);
1343 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1344 assert_return(!event_pid_changed(e), -ECHILD);
1345
1346 if (e->n_enabled_child_sources == 0) {
1347 r = signal_is_blocked(SIGCHLD);
1348 if (r < 0)
1349 return r;
1350 if (r == 0)
1351 return -EBUSY;
1352 }
1353
1354 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1355 if (r < 0)
1356 return r;
1357
1358 r = pidfd_get_pid(pidfd, &pid);
1359 if (r < 0)
1360 return r;
1361
1362 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1363 return -EBUSY;
1364
1365 s = source_new(e, !ret, SOURCE_CHILD);
1366 if (!s)
1367 return -ENOMEM;
1368
1369 s->wakeup = WAKEUP_EVENT_SOURCE;
1370 s->child.pidfd = pidfd;
1371 s->child.pid = pid;
1372 s->child.options = options;
1373 s->child.callback = callback;
1374 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1375 s->userdata = userdata;
1376 s->enabled = SD_EVENT_ONESHOT;
1377
1378 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1379 if (r < 0)
1380 return r;
1381
1382 e->n_enabled_child_sources++;
1383
1384 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1385 /* We only want to watch for WEXITED */
1386
1387 r = source_child_pidfd_register(s, s->enabled);
1388 if (r < 0) {
1389 e->n_enabled_child_sources--;
1390 return r;
1391 }
1392 } else {
1393 /* We shall wait for some other event than WEXITED */
1394
1395 r = event_make_signal_data(e, SIGCHLD, NULL);
1396 if (r < 0) {
1397 e->n_enabled_child_sources--;
1398 return r;
1399 }
1400
1401 e->need_process_child = true;
1402 }
1403
1404 if (ret)
1405 *ret = s;
1406
1407 TAKE_PTR(s);
1408 return 0;
1409 }
1410
1411 _public_ int sd_event_add_defer(
1412 sd_event *e,
1413 sd_event_source **ret,
1414 sd_event_handler_t callback,
1415 void *userdata) {
1416
1417 _cleanup_(source_freep) sd_event_source *s = NULL;
1418 int r;
1419
1420 assert_return(e, -EINVAL);
1421 assert_return(e = event_resolve(e), -ENOPKG);
1422 assert_return(callback, -EINVAL);
1423 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1424 assert_return(!event_pid_changed(e), -ECHILD);
1425
1426 s = source_new(e, !ret, SOURCE_DEFER);
1427 if (!s)
1428 return -ENOMEM;
1429
1430 s->defer.callback = callback;
1431 s->userdata = userdata;
1432 s->enabled = SD_EVENT_ONESHOT;
1433
1434 r = source_set_pending(s, true);
1435 if (r < 0)
1436 return r;
1437
1438 if (ret)
1439 *ret = s;
1440 TAKE_PTR(s);
1441
1442 return 0;
1443 }
1444
1445 _public_ int sd_event_add_post(
1446 sd_event *e,
1447 sd_event_source **ret,
1448 sd_event_handler_t callback,
1449 void *userdata) {
1450
1451 _cleanup_(source_freep) sd_event_source *s = NULL;
1452 int r;
1453
1454 assert_return(e, -EINVAL);
1455 assert_return(e = event_resolve(e), -ENOPKG);
1456 assert_return(callback, -EINVAL);
1457 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1458 assert_return(!event_pid_changed(e), -ECHILD);
1459
1460 r = set_ensure_allocated(&e->post_sources, NULL);
1461 if (r < 0)
1462 return r;
1463
1464 s = source_new(e, !ret, SOURCE_POST);
1465 if (!s)
1466 return -ENOMEM;
1467
1468 s->post.callback = callback;
1469 s->userdata = userdata;
1470 s->enabled = SD_EVENT_ON;
1471
1472 r = set_put(e->post_sources, s);
1473 if (r < 0)
1474 return r;
1475
1476 if (ret)
1477 *ret = s;
1478 TAKE_PTR(s);
1479
1480 return 0;
1481 }
1482
1483 _public_ int sd_event_add_exit(
1484 sd_event *e,
1485 sd_event_source **ret,
1486 sd_event_handler_t callback,
1487 void *userdata) {
1488
1489 _cleanup_(source_freep) sd_event_source *s = NULL;
1490 int r;
1491
1492 assert_return(e, -EINVAL);
1493 assert_return(e = event_resolve(e), -ENOPKG);
1494 assert_return(callback, -EINVAL);
1495 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1496 assert_return(!event_pid_changed(e), -ECHILD);
1497
1498 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1499 if (r < 0)
1500 return r;
1501
1502 s = source_new(e, !ret, SOURCE_EXIT);
1503 if (!s)
1504 return -ENOMEM;
1505
1506 s->exit.callback = callback;
1507 s->userdata = userdata;
1508 s->exit.prioq_index = PRIOQ_IDX_NULL;
1509 s->enabled = SD_EVENT_ONESHOT;
1510
1511 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1512 if (r < 0)
1513 return r;
1514
1515 if (ret)
1516 *ret = s;
1517 TAKE_PTR(s);
1518
1519 return 0;
1520 }
1521
1522 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1523 assert(e);
1524
1525 if (!d)
1526 return;
1527
1528 assert(hashmap_isempty(d->inodes));
1529 assert(hashmap_isempty(d->wd));
1530
1531 if (d->buffer_filled > 0)
1532 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1533
1534 hashmap_free(d->inodes);
1535 hashmap_free(d->wd);
1536
1537 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1538
1539 if (d->fd >= 0) {
1540 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1541 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1542
1543 safe_close(d->fd);
1544 }
1545 free(d);
1546 }
1547
1548 static int event_make_inotify_data(
1549 sd_event *e,
1550 int64_t priority,
1551 struct inotify_data **ret) {
1552
1553 _cleanup_close_ int fd = -1;
1554 struct inotify_data *d;
1555 struct epoll_event ev;
1556 int r;
1557
1558 assert(e);
1559
1560 d = hashmap_get(e->inotify_data, &priority);
1561 if (d) {
1562 if (ret)
1563 *ret = d;
1564 return 0;
1565 }
1566
1567 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1568 if (fd < 0)
1569 return -errno;
1570
1571 fd = fd_move_above_stdio(fd);
1572
1573 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1574 if (r < 0)
1575 return r;
1576
1577 d = new(struct inotify_data, 1);
1578 if (!d)
1579 return -ENOMEM;
1580
1581 *d = (struct inotify_data) {
1582 .wakeup = WAKEUP_INOTIFY_DATA,
1583 .fd = TAKE_FD(fd),
1584 .priority = priority,
1585 };
1586
1587 r = hashmap_put(e->inotify_data, &d->priority, d);
1588 if (r < 0) {
1589 d->fd = safe_close(d->fd);
1590 free(d);
1591 return r;
1592 }
1593
1594 ev = (struct epoll_event) {
1595 .events = EPOLLIN,
1596 .data.ptr = d,
1597 };
1598
1599 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1600 r = -errno;
1601 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1602 * remove the fd from the epoll first, which we don't want as we couldn't
1603 * add it in the first place. */
1604 event_free_inotify_data(e, d);
1605 return r;
1606 }
1607
1608 if (ret)
1609 *ret = d;
1610
1611 return 1;
1612 }
1613
1614 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1615 int r;
1616
1617 assert(x);
1618 assert(y);
1619
1620 r = CMP(x->dev, y->dev);
1621 if (r != 0)
1622 return r;
1623
1624 return CMP(x->ino, y->ino);
1625 }
1626
1627 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1628 assert(d);
1629
1630 siphash24_compress(&d->dev, sizeof(d->dev), state);
1631 siphash24_compress(&d->ino, sizeof(d->ino), state);
1632 }
1633
1634 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1635
1636 static void event_free_inode_data(
1637 sd_event *e,
1638 struct inode_data *d) {
1639
1640 assert(e);
1641
1642 if (!d)
1643 return;
1644
1645 assert(!d->event_sources);
1646
1647 if (d->fd >= 0) {
1648 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1649 safe_close(d->fd);
1650 }
1651
1652 if (d->inotify_data) {
1653
1654 if (d->wd >= 0) {
1655 if (d->inotify_data->fd >= 0) {
1656 /* So here's a problem. At the time this runs the watch descriptor might already be
1657 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1658 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1659 * likely case to happen. */
1660
1661 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1662 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1663 }
1664
1665 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1666 }
1667
1668 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1669 }
1670
1671 free(d);
1672 }
1673
1674 static void event_gc_inode_data(
1675 sd_event *e,
1676 struct inode_data *d) {
1677
1678 struct inotify_data *inotify_data;
1679
1680 assert(e);
1681
1682 if (!d)
1683 return;
1684
1685 if (d->event_sources)
1686 return;
1687
1688 inotify_data = d->inotify_data;
1689 event_free_inode_data(e, d);
1690
1691 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1692 event_free_inotify_data(e, inotify_data);
1693 }
1694
1695 static int event_make_inode_data(
1696 sd_event *e,
1697 struct inotify_data *inotify_data,
1698 dev_t dev,
1699 ino_t ino,
1700 struct inode_data **ret) {
1701
1702 struct inode_data *d, key;
1703 int r;
1704
1705 assert(e);
1706 assert(inotify_data);
1707
1708 key = (struct inode_data) {
1709 .ino = ino,
1710 .dev = dev,
1711 };
1712
1713 d = hashmap_get(inotify_data->inodes, &key);
1714 if (d) {
1715 if (ret)
1716 *ret = d;
1717
1718 return 0;
1719 }
1720
1721 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1722 if (r < 0)
1723 return r;
1724
1725 d = new(struct inode_data, 1);
1726 if (!d)
1727 return -ENOMEM;
1728
1729 *d = (struct inode_data) {
1730 .dev = dev,
1731 .ino = ino,
1732 .wd = -1,
1733 .fd = -1,
1734 .inotify_data = inotify_data,
1735 };
1736
1737 r = hashmap_put(inotify_data->inodes, d, d);
1738 if (r < 0) {
1739 free(d);
1740 return r;
1741 }
1742
1743 if (ret)
1744 *ret = d;
1745
1746 return 1;
1747 }
1748
1749 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1750 bool excl_unlink = true;
1751 uint32_t combined = 0;
1752 sd_event_source *s;
1753
1754 assert(d);
1755
1756 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1757 * the IN_EXCL_UNLINK flag is ANDed instead.
1758 *
1759 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1760 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1761 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1762 * events we don't care for client-side. */
1763
1764 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1765
1766 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1767 excl_unlink = false;
1768
1769 combined |= s->inotify.mask;
1770 }
1771
1772 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1773 }
1774
1775 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1776 uint32_t combined_mask;
1777 int wd, r;
1778
1779 assert(d);
1780 assert(d->fd >= 0);
1781
1782 combined_mask = inode_data_determine_mask(d);
1783
1784 if (d->wd >= 0 && combined_mask == d->combined_mask)
1785 return 0;
1786
1787 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1788 if (r < 0)
1789 return r;
1790
1791 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1792 if (wd < 0)
1793 return -errno;
1794
1795 if (d->wd < 0) {
1796 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1797 if (r < 0) {
1798 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1799 return r;
1800 }
1801
1802 d->wd = wd;
1803
1804 } else if (d->wd != wd) {
1805
1806 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1807 (void) inotify_rm_watch(d->fd, wd);
1808 return -EINVAL;
1809 }
1810
1811 d->combined_mask = combined_mask;
1812 return 1;
1813 }
1814
1815 _public_ int sd_event_add_inotify(
1816 sd_event *e,
1817 sd_event_source **ret,
1818 const char *path,
1819 uint32_t mask,
1820 sd_event_inotify_handler_t callback,
1821 void *userdata) {
1822
1823 struct inotify_data *inotify_data = NULL;
1824 struct inode_data *inode_data = NULL;
1825 _cleanup_close_ int fd = -1;
1826 _cleanup_(source_freep) sd_event_source *s = NULL;
1827 struct stat st;
1828 int r;
1829
1830 assert_return(e, -EINVAL);
1831 assert_return(e = event_resolve(e), -ENOPKG);
1832 assert_return(path, -EINVAL);
1833 assert_return(callback, -EINVAL);
1834 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1835 assert_return(!event_pid_changed(e), -ECHILD);
1836
1837 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1838 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1839 * the user can't use them for us. */
1840 if (mask & IN_MASK_ADD)
1841 return -EINVAL;
1842
1843 fd = open(path, O_PATH|O_CLOEXEC|
1844 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1845 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1846 if (fd < 0)
1847 return -errno;
1848
1849 if (fstat(fd, &st) < 0)
1850 return -errno;
1851
1852 s = source_new(e, !ret, SOURCE_INOTIFY);
1853 if (!s)
1854 return -ENOMEM;
1855
1856 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1857 s->inotify.mask = mask;
1858 s->inotify.callback = callback;
1859 s->userdata = userdata;
1860
1861 /* Allocate an inotify object for this priority, and an inode object within it */
1862 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1863 if (r < 0)
1864 return r;
1865
1866 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1867 if (r < 0) {
1868 event_free_inotify_data(e, inotify_data);
1869 return r;
1870 }
1871
1872 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1873 * the event source, until then, for which we need the original inode. */
1874 if (inode_data->fd < 0) {
1875 inode_data->fd = TAKE_FD(fd);
1876 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1877 }
1878
1879 /* Link our event source to the inode data object */
1880 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1881 s->inotify.inode_data = inode_data;
1882
1883 /* Actually realize the watch now */
1884 r = inode_data_realize_watch(e, inode_data);
1885 if (r < 0)
1886 return r;
1887
1888 (void) sd_event_source_set_description(s, path);
1889
1890 if (ret)
1891 *ret = s;
1892 TAKE_PTR(s);
1893
1894 return 0;
1895 }
1896
1897 static sd_event_source* event_source_free(sd_event_source *s) {
1898 if (!s)
1899 return NULL;
1900
1901 /* Here's a special hack: when we are called from a
1902 * dispatch handler we won't free the event source
1903 * immediately, but we will detach the fd from the
1904 * epoll. This way it is safe for the caller to unref
1905 * the event source and immediately close the fd, but
1906 * we still retain a valid event source object after
1907 * the callback. */
1908
1909 if (s->dispatching) {
1910 if (s->type == SOURCE_IO)
1911 source_io_unregister(s);
1912
1913 source_disconnect(s);
1914 } else
1915 source_free(s);
1916
1917 return NULL;
1918 }
1919
1920 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1921
1922 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1923 assert_return(s, -EINVAL);
1924 assert_return(!event_pid_changed(s->event), -ECHILD);
1925
1926 return free_and_strdup(&s->description, description);
1927 }
1928
1929 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1930 assert_return(s, -EINVAL);
1931 assert_return(description, -EINVAL);
1932 assert_return(!event_pid_changed(s->event), -ECHILD);
1933
1934 if (!s->description)
1935 return -ENXIO;
1936
1937 *description = s->description;
1938 return 0;
1939 }
1940
1941 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1942 assert_return(s, NULL);
1943
1944 return s->event;
1945 }
1946
1947 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1948 assert_return(s, -EINVAL);
1949 assert_return(s->type != SOURCE_EXIT, -EDOM);
1950 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1951 assert_return(!event_pid_changed(s->event), -ECHILD);
1952
1953 return s->pending;
1954 }
1955
1956 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1957 assert_return(s, -EINVAL);
1958 assert_return(s->type == SOURCE_IO, -EDOM);
1959 assert_return(!event_pid_changed(s->event), -ECHILD);
1960
1961 return s->io.fd;
1962 }
1963
1964 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1965 int r;
1966
1967 assert_return(s, -EINVAL);
1968 assert_return(fd >= 0, -EBADF);
1969 assert_return(s->type == SOURCE_IO, -EDOM);
1970 assert_return(!event_pid_changed(s->event), -ECHILD);
1971
1972 if (s->io.fd == fd)
1973 return 0;
1974
1975 if (s->enabled == SD_EVENT_OFF) {
1976 s->io.fd = fd;
1977 s->io.registered = false;
1978 } else {
1979 int saved_fd;
1980
1981 saved_fd = s->io.fd;
1982 assert(s->io.registered);
1983
1984 s->io.fd = fd;
1985 s->io.registered = false;
1986
1987 r = source_io_register(s, s->enabled, s->io.events);
1988 if (r < 0) {
1989 s->io.fd = saved_fd;
1990 s->io.registered = true;
1991 return r;
1992 }
1993
1994 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1995 }
1996
1997 return 0;
1998 }
1999
2000 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2001 assert_return(s, -EINVAL);
2002 assert_return(s->type == SOURCE_IO, -EDOM);
2003
2004 return s->io.owned;
2005 }
2006
2007 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2008 assert_return(s, -EINVAL);
2009 assert_return(s->type == SOURCE_IO, -EDOM);
2010
2011 s->io.owned = own;
2012 return 0;
2013 }
2014
2015 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2016 assert_return(s, -EINVAL);
2017 assert_return(events, -EINVAL);
2018 assert_return(s->type == SOURCE_IO, -EDOM);
2019 assert_return(!event_pid_changed(s->event), -ECHILD);
2020
2021 *events = s->io.events;
2022 return 0;
2023 }
2024
2025 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2026 int r;
2027
2028 assert_return(s, -EINVAL);
2029 assert_return(s->type == SOURCE_IO, -EDOM);
2030 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2031 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2032 assert_return(!event_pid_changed(s->event), -ECHILD);
2033
2034 /* edge-triggered updates are never skipped, so we can reset edges */
2035 if (s->io.events == events && !(events & EPOLLET))
2036 return 0;
2037
2038 r = source_set_pending(s, false);
2039 if (r < 0)
2040 return r;
2041
2042 if (s->enabled != SD_EVENT_OFF) {
2043 r = source_io_register(s, s->enabled, events);
2044 if (r < 0)
2045 return r;
2046 }
2047
2048 s->io.events = events;
2049
2050 return 0;
2051 }
2052
2053 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2054 assert_return(s, -EINVAL);
2055 assert_return(revents, -EINVAL);
2056 assert_return(s->type == SOURCE_IO, -EDOM);
2057 assert_return(s->pending, -ENODATA);
2058 assert_return(!event_pid_changed(s->event), -ECHILD);
2059
2060 *revents = s->io.revents;
2061 return 0;
2062 }
2063
2064 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2065 assert_return(s, -EINVAL);
2066 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2067 assert_return(!event_pid_changed(s->event), -ECHILD);
2068
2069 return s->signal.sig;
2070 }
2071
2072 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2073 assert_return(s, -EINVAL);
2074 assert_return(!event_pid_changed(s->event), -ECHILD);
2075
2076 *priority = s->priority;
2077 return 0;
2078 }
2079
2080 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2081 bool rm_inotify = false, rm_inode = false;
2082 struct inotify_data *new_inotify_data = NULL;
2083 struct inode_data *new_inode_data = NULL;
2084 int r;
2085
2086 assert_return(s, -EINVAL);
2087 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2088 assert_return(!event_pid_changed(s->event), -ECHILD);
2089
2090 if (s->priority == priority)
2091 return 0;
2092
2093 if (s->type == SOURCE_INOTIFY) {
2094 struct inode_data *old_inode_data;
2095
2096 assert(s->inotify.inode_data);
2097 old_inode_data = s->inotify.inode_data;
2098
2099 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2100 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2101 * events we allow priority changes only until the first following iteration. */
2102 if (old_inode_data->fd < 0)
2103 return -EOPNOTSUPP;
2104
2105 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2106 if (r < 0)
2107 return r;
2108 rm_inotify = r > 0;
2109
2110 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2111 if (r < 0)
2112 goto fail;
2113 rm_inode = r > 0;
2114
2115 if (new_inode_data->fd < 0) {
2116 /* Duplicate the fd for the new inode object if we don't have any yet */
2117 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2118 if (new_inode_data->fd < 0) {
2119 r = -errno;
2120 goto fail;
2121 }
2122
2123 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2124 }
2125
2126 /* Move the event source to the new inode data structure */
2127 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2128 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2129 s->inotify.inode_data = new_inode_data;
2130
2131 /* Now create the new watch */
2132 r = inode_data_realize_watch(s->event, new_inode_data);
2133 if (r < 0) {
2134 /* Move it back */
2135 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2136 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2137 s->inotify.inode_data = old_inode_data;
2138 goto fail;
2139 }
2140
2141 s->priority = priority;
2142
2143 event_gc_inode_data(s->event, old_inode_data);
2144
2145 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2146 struct signal_data *old, *d;
2147
2148 /* Move us from the signalfd belonging to the old
2149 * priority to the signalfd of the new priority */
2150
2151 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2152
2153 s->priority = priority;
2154
2155 r = event_make_signal_data(s->event, s->signal.sig, &d);
2156 if (r < 0) {
2157 s->priority = old->priority;
2158 return r;
2159 }
2160
2161 event_unmask_signal_data(s->event, old, s->signal.sig);
2162 } else
2163 s->priority = priority;
2164
2165 if (s->pending)
2166 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2167
2168 if (s->prepare)
2169 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2170
2171 if (s->type == SOURCE_EXIT)
2172 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2173
2174 return 0;
2175
2176 fail:
2177 if (rm_inode)
2178 event_free_inode_data(s->event, new_inode_data);
2179
2180 if (rm_inotify)
2181 event_free_inotify_data(s->event, new_inotify_data);
2182
2183 return r;
2184 }
2185
2186 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2187 assert_return(s, -EINVAL);
2188 assert_return(!event_pid_changed(s->event), -ECHILD);
2189
2190 if (m)
2191 *m = s->enabled;
2192 return s->enabled != SD_EVENT_OFF;
2193 }
2194
2195 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2196 int r;
2197
2198 assert_return(s, -EINVAL);
2199 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2200 assert_return(!event_pid_changed(s->event), -ECHILD);
2201
2202 /* If we are dead anyway, we are fine with turning off
2203 * sources, but everything else needs to fail. */
2204 if (s->event->state == SD_EVENT_FINISHED)
2205 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2206
2207 if (s->enabled == m)
2208 return 0;
2209
2210 if (m == SD_EVENT_OFF) {
2211
2212 /* Unset the pending flag when this event source is disabled */
2213 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2214 r = source_set_pending(s, false);
2215 if (r < 0)
2216 return r;
2217 }
2218
2219 switch (s->type) {
2220
2221 case SOURCE_IO:
2222 source_io_unregister(s);
2223 s->enabled = m;
2224 break;
2225
2226 case SOURCE_TIME_REALTIME:
2227 case SOURCE_TIME_BOOTTIME:
2228 case SOURCE_TIME_MONOTONIC:
2229 case SOURCE_TIME_REALTIME_ALARM:
2230 case SOURCE_TIME_BOOTTIME_ALARM: {
2231 struct clock_data *d;
2232
2233 s->enabled = m;
2234 d = event_get_clock_data(s->event, s->type);
2235 assert(d);
2236
2237 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2238 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2239 d->needs_rearm = true;
2240 break;
2241 }
2242
2243 case SOURCE_SIGNAL:
2244 s->enabled = m;
2245
2246 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2247 break;
2248
2249 case SOURCE_CHILD:
2250 s->enabled = m;
2251
2252 assert(s->event->n_enabled_child_sources > 0);
2253 s->event->n_enabled_child_sources--;
2254
2255 if (EVENT_SOURCE_WATCH_PIDFD(s))
2256 source_child_pidfd_unregister(s);
2257 else
2258 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2259
2260 break;
2261
2262 case SOURCE_EXIT:
2263 s->enabled = m;
2264 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2265 break;
2266
2267 case SOURCE_DEFER:
2268 case SOURCE_POST:
2269 case SOURCE_INOTIFY:
2270 s->enabled = m;
2271 break;
2272
2273 default:
2274 assert_not_reached("Wut? I shouldn't exist.");
2275 }
2276
2277 } else {
2278
2279 /* Unset the pending flag when this event source is enabled */
2280 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2281 r = source_set_pending(s, false);
2282 if (r < 0)
2283 return r;
2284 }
2285
2286 switch (s->type) {
2287
2288 case SOURCE_IO:
2289 r = source_io_register(s, m, s->io.events);
2290 if (r < 0)
2291 return r;
2292
2293 s->enabled = m;
2294 break;
2295
2296 case SOURCE_TIME_REALTIME:
2297 case SOURCE_TIME_BOOTTIME:
2298 case SOURCE_TIME_MONOTONIC:
2299 case SOURCE_TIME_REALTIME_ALARM:
2300 case SOURCE_TIME_BOOTTIME_ALARM: {
2301 struct clock_data *d;
2302
2303 s->enabled = m;
2304 d = event_get_clock_data(s->event, s->type);
2305 assert(d);
2306
2307 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2308 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2309 d->needs_rearm = true;
2310 break;
2311 }
2312
2313 case SOURCE_SIGNAL:
2314
2315 s->enabled = m;
2316
2317 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2318 if (r < 0) {
2319 s->enabled = SD_EVENT_OFF;
2320 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2321 return r;
2322 }
2323
2324 break;
2325
2326 case SOURCE_CHILD:
2327
2328 if (s->enabled == SD_EVENT_OFF)
2329 s->event->n_enabled_child_sources++;
2330
2331 s->enabled = m;
2332
2333 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2334 /* yes, we have pidfd */
2335
2336 r = source_child_pidfd_register(s, s->enabled);
2337 if (r < 0) {
2338 s->enabled = SD_EVENT_OFF;
2339 s->event->n_enabled_child_sources--;
2340 return r;
2341 }
2342 } else {
2343 /* no pidfd, or something other to watch for than WEXITED */
2344
2345 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2346 if (r < 0) {
2347 s->enabled = SD_EVENT_OFF;
2348 s->event->n_enabled_child_sources--;
2349 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2350 return r;
2351 }
2352 }
2353
2354 break;
2355
2356 case SOURCE_EXIT:
2357 s->enabled = m;
2358 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2359 break;
2360
2361 case SOURCE_DEFER:
2362 case SOURCE_POST:
2363 case SOURCE_INOTIFY:
2364 s->enabled = m;
2365 break;
2366
2367 default:
2368 assert_not_reached("Wut? I shouldn't exist.");
2369 }
2370 }
2371
2372 if (s->pending)
2373 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2374
2375 if (s->prepare)
2376 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2377
2378 return 0;
2379 }
2380
2381 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2382 assert_return(s, -EINVAL);
2383 assert_return(usec, -EINVAL);
2384 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2385 assert_return(!event_pid_changed(s->event), -ECHILD);
2386
2387 *usec = s->time.next;
2388 return 0;
2389 }
2390
2391 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2392 struct clock_data *d;
2393 int r;
2394
2395 assert_return(s, -EINVAL);
2396 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2397 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2398 assert_return(!event_pid_changed(s->event), -ECHILD);
2399
2400 r = source_set_pending(s, false);
2401 if (r < 0)
2402 return r;
2403
2404 s->time.next = usec;
2405
2406 d = event_get_clock_data(s->event, s->type);
2407 assert(d);
2408
2409 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2410 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2411 d->needs_rearm = true;
2412
2413 return 0;
2414 }
2415
2416 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2417 assert_return(s, -EINVAL);
2418 assert_return(usec, -EINVAL);
2419 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2420 assert_return(!event_pid_changed(s->event), -ECHILD);
2421
2422 *usec = s->time.accuracy;
2423 return 0;
2424 }
2425
2426 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2427 struct clock_data *d;
2428 int r;
2429
2430 assert_return(s, -EINVAL);
2431 assert_return(usec != (uint64_t) -1, -EINVAL);
2432 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2433 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2434 assert_return(!event_pid_changed(s->event), -ECHILD);
2435
2436 r = source_set_pending(s, false);
2437 if (r < 0)
2438 return r;
2439
2440 if (usec == 0)
2441 usec = DEFAULT_ACCURACY_USEC;
2442
2443 s->time.accuracy = usec;
2444
2445 d = event_get_clock_data(s->event, s->type);
2446 assert(d);
2447
2448 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2449 d->needs_rearm = true;
2450
2451 return 0;
2452 }
2453
2454 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2455 assert_return(s, -EINVAL);
2456 assert_return(clock, -EINVAL);
2457 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2458 assert_return(!event_pid_changed(s->event), -ECHILD);
2459
2460 *clock = event_source_type_to_clock(s->type);
2461 return 0;
2462 }
2463
2464 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2465 assert_return(s, -EINVAL);
2466 assert_return(pid, -EINVAL);
2467 assert_return(s->type == SOURCE_CHILD, -EDOM);
2468 assert_return(!event_pid_changed(s->event), -ECHILD);
2469
2470 *pid = s->child.pid;
2471 return 0;
2472 }
2473
2474 _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2475 assert_return(s, -EINVAL);
2476 assert_return(s->type == SOURCE_CHILD, -EDOM);
2477 assert_return(!event_pid_changed(s->event), -ECHILD);
2478
2479 if (s->child.pidfd < 0)
2480 return -EOPNOTSUPP;
2481
2482 return s->child.pidfd;
2483 }
2484
2485 _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2486 assert_return(s, -EINVAL);
2487 assert_return(s->type == SOURCE_CHILD, -EDOM);
2488 assert_return(!event_pid_changed(s->event), -ECHILD);
2489 assert_return(SIGNAL_VALID(sig), -EINVAL);
2490
2491 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2492 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2493 * available. */
2494 if (s->child.exited)
2495 return -ESRCH;
2496
2497 if (s->child.pidfd >= 0) {
2498 siginfo_t copy;
2499
2500 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2501 * structure here */
2502 if (si)
2503 copy = *si;
2504
2505 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2506 /* Let's propagate the error only if the system call is not implemented or prohibited */
2507 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2508 return -errno;
2509 } else
2510 return 0;
2511 }
2512
2513 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2514 * this here. */
2515 if (flags != 0)
2516 return -EOPNOTSUPP;
2517
2518 if (si) {
2519 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2520 siginfo_t copy = *si;
2521
2522 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2523 return -errno;
2524 } else if (kill(s->child.pid, sig) < 0)
2525 return -errno;
2526
2527 return 0;
2528 }
2529
2530 _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2531 assert_return(s, -EINVAL);
2532 assert_return(s->type == SOURCE_CHILD, -EDOM);
2533
2534 if (s->child.pidfd < 0)
2535 return -EOPNOTSUPP;
2536
2537 return s->child.pidfd_owned;
2538 }
2539
2540 _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2541 assert_return(s, -EINVAL);
2542 assert_return(s->type == SOURCE_CHILD, -EDOM);
2543
2544 if (s->child.pidfd < 0)
2545 return -EOPNOTSUPP;
2546
2547 s->child.pidfd_owned = own;
2548 return 0;
2549 }
2550
2551 _public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2552 assert_return(s, -EINVAL);
2553 assert_return(s->type == SOURCE_CHILD, -EDOM);
2554
2555 return s->child.process_owned;
2556 }
2557
2558 _public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2559 assert_return(s, -EINVAL);
2560 assert_return(s->type == SOURCE_CHILD, -EDOM);
2561
2562 s->child.process_owned = own;
2563 return 0;
2564 }
2565
2566 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2567 assert_return(s, -EINVAL);
2568 assert_return(mask, -EINVAL);
2569 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2570 assert_return(!event_pid_changed(s->event), -ECHILD);
2571
2572 *mask = s->inotify.mask;
2573 return 0;
2574 }
2575
2576 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2577 int r;
2578
2579 assert_return(s, -EINVAL);
2580 assert_return(s->type != SOURCE_EXIT, -EDOM);
2581 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2582 assert_return(!event_pid_changed(s->event), -ECHILD);
2583
2584 if (s->prepare == callback)
2585 return 0;
2586
2587 if (callback && s->prepare) {
2588 s->prepare = callback;
2589 return 0;
2590 }
2591
2592 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2593 if (r < 0)
2594 return r;
2595
2596 s->prepare = callback;
2597
2598 if (callback) {
2599 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2600 if (r < 0)
2601 return r;
2602 } else
2603 prioq_remove(s->event->prepare, s, &s->prepare_index);
2604
2605 return 0;
2606 }
2607
2608 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2609 assert_return(s, NULL);
2610
2611 return s->userdata;
2612 }
2613
2614 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2615 void *ret;
2616
2617 assert_return(s, NULL);
2618
2619 ret = s->userdata;
2620 s->userdata = userdata;
2621
2622 return ret;
2623 }
2624
2625 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2626 usec_t c;
2627 assert(e);
2628 assert(a <= b);
2629
2630 if (a <= 0)
2631 return 0;
2632 if (a >= USEC_INFINITY)
2633 return USEC_INFINITY;
2634
2635 if (b <= a + 1)
2636 return a;
2637
2638 initialize_perturb(e);
2639
2640 /*
2641 Find a good time to wake up again between times a and b. We
2642 have two goals here:
2643
2644 a) We want to wake up as seldom as possible, hence prefer
2645 later times over earlier times.
2646
2647 b) But if we have to wake up, then let's make sure to
2648 dispatch as much as possible on the entire system.
2649
2650 We implement this by waking up everywhere at the same time
2651 within any given minute if we can, synchronised via the
2652 perturbation value determined from the boot ID. If we can't,
2653 then we try to find the same spot in every 10s, then 1s and
2654 then 250ms step. Otherwise, we pick the last possible time
2655 to wake up.
2656 */
2657
2658 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2659 if (c >= b) {
2660 if (_unlikely_(c < USEC_PER_MINUTE))
2661 return b;
2662
2663 c -= USEC_PER_MINUTE;
2664 }
2665
2666 if (c >= a)
2667 return c;
2668
2669 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2670 if (c >= b) {
2671 if (_unlikely_(c < USEC_PER_SEC*10))
2672 return b;
2673
2674 c -= USEC_PER_SEC*10;
2675 }
2676
2677 if (c >= a)
2678 return c;
2679
2680 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2681 if (c >= b) {
2682 if (_unlikely_(c < USEC_PER_SEC))
2683 return b;
2684
2685 c -= USEC_PER_SEC;
2686 }
2687
2688 if (c >= a)
2689 return c;
2690
2691 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2692 if (c >= b) {
2693 if (_unlikely_(c < USEC_PER_MSEC*250))
2694 return b;
2695
2696 c -= USEC_PER_MSEC*250;
2697 }
2698
2699 if (c >= a)
2700 return c;
2701
2702 return b;
2703 }
2704
2705 static int event_arm_timer(
2706 sd_event *e,
2707 struct clock_data *d) {
2708
2709 struct itimerspec its = {};
2710 sd_event_source *a, *b;
2711 usec_t t;
2712 int r;
2713
2714 assert(e);
2715 assert(d);
2716
2717 if (!d->needs_rearm)
2718 return 0;
2719 else
2720 d->needs_rearm = false;
2721
2722 a = prioq_peek(d->earliest);
2723 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2724
2725 if (d->fd < 0)
2726 return 0;
2727
2728 if (d->next == USEC_INFINITY)
2729 return 0;
2730
2731 /* disarm */
2732 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2733 if (r < 0)
2734 return r;
2735
2736 d->next = USEC_INFINITY;
2737 return 0;
2738 }
2739
2740 b = prioq_peek(d->latest);
2741 assert_se(b && b->enabled != SD_EVENT_OFF);
2742
2743 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2744 if (d->next == t)
2745 return 0;
2746
2747 assert_se(d->fd >= 0);
2748
2749 if (t == 0) {
2750 /* We don' want to disarm here, just mean some time looooong ago. */
2751 its.it_value.tv_sec = 0;
2752 its.it_value.tv_nsec = 1;
2753 } else
2754 timespec_store(&its.it_value, t);
2755
2756 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2757 if (r < 0)
2758 return -errno;
2759
2760 d->next = t;
2761 return 0;
2762 }
2763
2764 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2765 assert(e);
2766 assert(s);
2767 assert(s->type == SOURCE_IO);
2768
2769 /* If the event source was already pending, we just OR in the
2770 * new revents, otherwise we reset the value. The ORing is
2771 * necessary to handle EPOLLONESHOT events properly where
2772 * readability might happen independently of writability, and
2773 * we need to keep track of both */
2774
2775 if (s->pending)
2776 s->io.revents |= revents;
2777 else
2778 s->io.revents = revents;
2779
2780 return source_set_pending(s, true);
2781 }
2782
2783 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2784 uint64_t x;
2785 ssize_t ss;
2786
2787 assert(e);
2788 assert(fd >= 0);
2789
2790 assert_return(events == EPOLLIN, -EIO);
2791
2792 ss = read(fd, &x, sizeof(x));
2793 if (ss < 0) {
2794 if (IN_SET(errno, EAGAIN, EINTR))
2795 return 0;
2796
2797 return -errno;
2798 }
2799
2800 if (_unlikely_(ss != sizeof(x)))
2801 return -EIO;
2802
2803 if (next)
2804 *next = USEC_INFINITY;
2805
2806 return 0;
2807 }
2808
2809 static int process_timer(
2810 sd_event *e,
2811 usec_t n,
2812 struct clock_data *d) {
2813
2814 sd_event_source *s;
2815 int r;
2816
2817 assert(e);
2818 assert(d);
2819
2820 for (;;) {
2821 s = prioq_peek(d->earliest);
2822 if (!s ||
2823 s->time.next > n ||
2824 s->enabled == SD_EVENT_OFF ||
2825 s->pending)
2826 break;
2827
2828 r = source_set_pending(s, true);
2829 if (r < 0)
2830 return r;
2831
2832 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2833 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2834 d->needs_rearm = true;
2835 }
2836
2837 return 0;
2838 }
2839
2840 static int process_child(sd_event *e) {
2841 sd_event_source *s;
2842 Iterator i;
2843 int r;
2844
2845 assert(e);
2846
2847 e->need_process_child = false;
2848
2849 /*
2850 So, this is ugly. We iteratively invoke waitid() with P_PID
2851 + WNOHANG for each PID we wait for, instead of using
2852 P_ALL. This is because we only want to get child
2853 information of very specific child processes, and not all
2854 of them. We might not have processed the SIGCHLD even of a
2855 previous invocation and we don't want to maintain a
2856 unbounded *per-child* event queue, hence we really don't
2857 want anything flushed out of the kernel's queue that we
2858 don't care about. Since this is O(n) this means that if you
2859 have a lot of processes you probably want to handle SIGCHLD
2860 yourself.
2861
2862 We do not reap the children here (by using WNOWAIT), this
2863 is only done after the event source is dispatched so that
2864 the callback still sees the process as a zombie.
2865 */
2866
2867 HASHMAP_FOREACH(s, e->child_sources, i) {
2868 assert(s->type == SOURCE_CHILD);
2869
2870 if (s->pending)
2871 continue;
2872
2873 if (s->enabled == SD_EVENT_OFF)
2874 continue;
2875
2876 if (s->child.exited)
2877 continue;
2878
2879 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2880 continue;
2881
2882 zero(s->child.siginfo);
2883 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2884 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2885 if (r < 0)
2886 return -errno;
2887
2888 if (s->child.siginfo.si_pid != 0) {
2889 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2890
2891 if (zombie)
2892 s->child.exited = true;
2893
2894 if (!zombie && (s->child.options & WEXITED)) {
2895 /* If the child isn't dead then let's
2896 * immediately remove the state change
2897 * from the queue, since there's no
2898 * benefit in leaving it queued */
2899
2900 assert(s->child.options & (WSTOPPED|WCONTINUED));
2901 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2902 }
2903
2904 r = source_set_pending(s, true);
2905 if (r < 0)
2906 return r;
2907 }
2908 }
2909
2910 return 0;
2911 }
2912
2913 static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2914 assert(e);
2915 assert(s);
2916 assert(s->type == SOURCE_CHILD);
2917
2918 if (s->pending)
2919 return 0;
2920
2921 if (s->enabled == SD_EVENT_OFF)
2922 return 0;
2923
2924 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2925 return 0;
2926
2927 zero(s->child.siginfo);
2928 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2929 return -errno;
2930
2931 if (s->child.siginfo.si_pid == 0)
2932 return 0;
2933
2934 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2935 s->child.exited = true;
2936
2937 return source_set_pending(s, true);
2938 }
2939
2940 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2941 bool read_one = false;
2942 int r;
2943
2944 assert(e);
2945 assert(d);
2946 assert_return(events == EPOLLIN, -EIO);
2947
2948 /* If there's a signal queued on this priority and SIGCHLD is
2949 on this priority too, then make sure to recheck the
2950 children we watch. This is because we only ever dequeue
2951 the first signal per priority, and if we dequeue one, and
2952 SIGCHLD might be enqueued later we wouldn't know, but we
2953 might have higher priority children we care about hence we
2954 need to check that explicitly. */
2955
2956 if (sigismember(&d->sigset, SIGCHLD))
2957 e->need_process_child = true;
2958
2959 /* If there's already an event source pending for this
2960 * priority we don't read another */
2961 if (d->current)
2962 return 0;
2963
2964 for (;;) {
2965 struct signalfd_siginfo si;
2966 ssize_t n;
2967 sd_event_source *s = NULL;
2968
2969 n = read(d->fd, &si, sizeof(si));
2970 if (n < 0) {
2971 if (IN_SET(errno, EAGAIN, EINTR))
2972 return read_one;
2973
2974 return -errno;
2975 }
2976
2977 if (_unlikely_(n != sizeof(si)))
2978 return -EIO;
2979
2980 assert(SIGNAL_VALID(si.ssi_signo));
2981
2982 read_one = true;
2983
2984 if (e->signal_sources)
2985 s = e->signal_sources[si.ssi_signo];
2986 if (!s)
2987 continue;
2988 if (s->pending)
2989 continue;
2990
2991 s->signal.siginfo = si;
2992 d->current = s;
2993
2994 r = source_set_pending(s, true);
2995 if (r < 0)
2996 return r;
2997
2998 return 1;
2999 }
3000 }
3001
3002 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3003 ssize_t n;
3004
3005 assert(e);
3006 assert(d);
3007
3008 assert_return(revents == EPOLLIN, -EIO);
3009
3010 /* If there's already an event source pending for this priority, don't read another */
3011 if (d->n_pending > 0)
3012 return 0;
3013
3014 /* Is the read buffer non-empty? If so, let's not read more */
3015 if (d->buffer_filled > 0)
3016 return 0;
3017
3018 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3019 if (n < 0) {
3020 if (IN_SET(errno, EAGAIN, EINTR))
3021 return 0;
3022
3023 return -errno;
3024 }
3025
3026 assert(n > 0);
3027 d->buffer_filled = (size_t) n;
3028 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3029
3030 return 1;
3031 }
3032
3033 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3034 assert(e);
3035 assert(d);
3036 assert(sz <= d->buffer_filled);
3037
3038 if (sz == 0)
3039 return;
3040
3041 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3042 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3043 d->buffer_filled -= sz;
3044
3045 if (d->buffer_filled == 0)
3046 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3047 }
3048
3049 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3050 int r;
3051
3052 assert(e);
3053 assert(d);
3054
3055 /* If there's already an event source pending for this priority, don't read another */
3056 if (d->n_pending > 0)
3057 return 0;
3058
3059 while (d->buffer_filled > 0) {
3060 size_t sz;
3061
3062 /* Let's validate that the event structures are complete */
3063 if (d->buffer_filled < offsetof(struct inotify_event, name))
3064 return -EIO;
3065
3066 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3067 if (d->buffer_filled < sz)
3068 return -EIO;
3069
3070 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3071 struct inode_data *inode_data;
3072 Iterator i;
3073
3074 /* The queue overran, let's pass this event to all event sources connected to this inotify
3075 * object */
3076
3077 HASHMAP_FOREACH(inode_data, d->inodes, i) {
3078 sd_event_source *s;
3079
3080 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3081
3082 if (s->enabled == SD_EVENT_OFF)
3083 continue;
3084
3085 r = source_set_pending(s, true);
3086 if (r < 0)
3087 return r;
3088 }
3089 }
3090 } else {
3091 struct inode_data *inode_data;
3092 sd_event_source *s;
3093
3094 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3095 * our watch descriptor table. */
3096 if (d->buffer.ev.mask & IN_IGNORED) {
3097
3098 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3099 if (!inode_data) {
3100 event_inotify_data_drop(e, d, sz);
3101 continue;
3102 }
3103
3104 /* The watch descriptor was removed by the kernel, let's drop it here too */
3105 inode_data->wd = -1;
3106 } else {
3107 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3108 if (!inode_data) {
3109 event_inotify_data_drop(e, d, sz);
3110 continue;
3111 }
3112 }
3113
3114 /* Trigger all event sources that are interested in these events. Also trigger all event
3115 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3116 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3117
3118 if (s->enabled == SD_EVENT_OFF)
3119 continue;
3120
3121 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3122 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3123 continue;
3124
3125 r = source_set_pending(s, true);
3126 if (r < 0)
3127 return r;
3128 }
3129 }
3130
3131 /* Something pending now? If so, let's finish, otherwise let's read more. */
3132 if (d->n_pending > 0)
3133 return 1;
3134 }
3135
3136 return 0;
3137 }
3138
3139 static int process_inotify(sd_event *e) {
3140 struct inotify_data *d;
3141 int r, done = 0;
3142
3143 assert(e);
3144
3145 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3146 r = event_inotify_data_process(e, d);
3147 if (r < 0)
3148 return r;
3149 if (r > 0)
3150 done ++;
3151 }
3152
3153 return done;
3154 }
3155
3156 static int source_dispatch(sd_event_source *s) {
3157 EventSourceType saved_type;
3158 int r = 0;
3159
3160 assert(s);
3161 assert(s->pending || s->type == SOURCE_EXIT);
3162
3163 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3164 * the event. */
3165 saved_type = s->type;
3166
3167 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3168 r = source_set_pending(s, false);
3169 if (r < 0)
3170 return r;
3171 }
3172
3173 if (s->type != SOURCE_POST) {
3174 sd_event_source *z;
3175 Iterator i;
3176
3177 /* If we execute a non-post source, let's mark all
3178 * post sources as pending */
3179
3180 SET_FOREACH(z, s->event->post_sources, i) {
3181 if (z->enabled == SD_EVENT_OFF)
3182 continue;
3183
3184 r = source_set_pending(z, true);
3185 if (r < 0)
3186 return r;
3187 }
3188 }
3189
3190 if (s->enabled == SD_EVENT_ONESHOT) {
3191 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3192 if (r < 0)
3193 return r;
3194 }
3195
3196 s->dispatching = true;
3197
3198 switch (s->type) {
3199
3200 case SOURCE_IO:
3201 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3202 break;
3203
3204 case SOURCE_TIME_REALTIME:
3205 case SOURCE_TIME_BOOTTIME:
3206 case SOURCE_TIME_MONOTONIC:
3207 case SOURCE_TIME_REALTIME_ALARM:
3208 case SOURCE_TIME_BOOTTIME_ALARM:
3209 r = s->time.callback(s, s->time.next, s->userdata);
3210 break;
3211
3212 case SOURCE_SIGNAL:
3213 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3214 break;
3215
3216 case SOURCE_CHILD: {
3217 bool zombie;
3218
3219 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3220
3221 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3222
3223 /* Now, reap the PID for good. */
3224 if (zombie) {
3225 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3226 s->child.waited = true;
3227 }
3228
3229 break;
3230 }
3231
3232 case SOURCE_DEFER:
3233 r = s->defer.callback(s, s->userdata);
3234 break;
3235
3236 case SOURCE_POST:
3237 r = s->post.callback(s, s->userdata);
3238 break;
3239
3240 case SOURCE_EXIT:
3241 r = s->exit.callback(s, s->userdata);
3242 break;
3243
3244 case SOURCE_INOTIFY: {
3245 struct sd_event *e = s->event;
3246 struct inotify_data *d;
3247 size_t sz;
3248
3249 assert(s->inotify.inode_data);
3250 assert_se(d = s->inotify.inode_data->inotify_data);
3251
3252 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3253 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3254 assert(d->buffer_filled >= sz);
3255
3256 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3257
3258 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3259 * buffer. */
3260 if (d->n_pending == 0)
3261 event_inotify_data_drop(e, d, sz);
3262
3263 break;
3264 }
3265
3266 case SOURCE_WATCHDOG:
3267 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3268 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3269 assert_not_reached("Wut? I shouldn't exist.");
3270 }
3271
3272 s->dispatching = false;
3273
3274 if (r < 0)
3275 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3276 strna(s->description), event_source_type_to_string(saved_type));
3277
3278 if (s->n_ref == 0)
3279 source_free(s);
3280 else if (r < 0)
3281 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3282
3283 return 1;
3284 }
3285
3286 static int event_prepare(sd_event *e) {
3287 int r;
3288
3289 assert(e);
3290
3291 for (;;) {
3292 sd_event_source *s;
3293
3294 s = prioq_peek(e->prepare);
3295 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3296 break;
3297
3298 s->prepare_iteration = e->iteration;
3299 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3300 if (r < 0)
3301 return r;
3302
3303 assert(s->prepare);
3304
3305 s->dispatching = true;
3306 r = s->prepare(s, s->userdata);
3307 s->dispatching = false;
3308
3309 if (r < 0)
3310 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3311 strna(s->description), event_source_type_to_string(s->type));
3312
3313 if (s->n_ref == 0)
3314 source_free(s);
3315 else if (r < 0)
3316 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3317 }
3318
3319 return 0;
3320 }
3321
3322 static int dispatch_exit(sd_event *e) {
3323 sd_event_source *p;
3324 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3325 int r;
3326
3327 assert(e);
3328
3329 p = prioq_peek(e->exit);
3330 if (!p || p->enabled == SD_EVENT_OFF) {
3331 e->state = SD_EVENT_FINISHED;
3332 return 0;
3333 }
3334
3335 ref = sd_event_ref(e);
3336 e->iteration++;
3337 e->state = SD_EVENT_EXITING;
3338 r = source_dispatch(p);
3339 e->state = SD_EVENT_INITIAL;
3340 return r;
3341 }
3342
3343 static sd_event_source* event_next_pending(sd_event *e) {
3344 sd_event_source *p;
3345
3346 assert(e);
3347
3348 p = prioq_peek(e->pending);
3349 if (!p)
3350 return NULL;
3351
3352 if (p->enabled == SD_EVENT_OFF)
3353 return NULL;
3354
3355 return p;
3356 }
3357
3358 static int arm_watchdog(sd_event *e) {
3359 struct itimerspec its = {};
3360 usec_t t;
3361 int r;
3362
3363 assert(e);
3364 assert(e->watchdog_fd >= 0);
3365
3366 t = sleep_between(e,
3367 e->watchdog_last + (e->watchdog_period / 2),
3368 e->watchdog_last + (e->watchdog_period * 3 / 4));
3369
3370 timespec_store(&its.it_value, t);
3371
3372 /* Make sure we never set the watchdog to 0, which tells the
3373 * kernel to disable it. */
3374 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3375 its.it_value.tv_nsec = 1;
3376
3377 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3378 if (r < 0)
3379 return -errno;
3380
3381 return 0;
3382 }
3383
3384 static int process_watchdog(sd_event *e) {
3385 assert(e);
3386
3387 if (!e->watchdog)
3388 return 0;
3389
3390 /* Don't notify watchdog too often */
3391 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3392 return 0;
3393
3394 sd_notify(false, "WATCHDOG=1");
3395 e->watchdog_last = e->timestamp.monotonic;
3396
3397 return arm_watchdog(e);
3398 }
3399
3400 static void event_close_inode_data_fds(sd_event *e) {
3401 struct inode_data *d;
3402
3403 assert(e);
3404
3405 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3406 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3407 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3408 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3409 * compromise. */
3410
3411 while ((d = e->inode_data_to_close)) {
3412 assert(d->fd >= 0);
3413 d->fd = safe_close(d->fd);
3414
3415 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3416 }
3417 }
3418
3419 _public_ int sd_event_prepare(sd_event *e) {
3420 int r;
3421
3422 assert_return(e, -EINVAL);
3423 assert_return(e = event_resolve(e), -ENOPKG);
3424 assert_return(!event_pid_changed(e), -ECHILD);
3425 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3426 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3427
3428 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3429 * this check here once, since gettid() is typically not cached, and thus want to minimize
3430 * syscalls */
3431 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3432
3433 if (e->exit_requested)
3434 goto pending;
3435
3436 e->iteration++;
3437
3438 e->state = SD_EVENT_PREPARING;
3439 r = event_prepare(e);
3440 e->state = SD_EVENT_INITIAL;
3441 if (r < 0)
3442 return r;
3443
3444 r = event_arm_timer(e, &e->realtime);
3445 if (r < 0)
3446 return r;
3447
3448 r = event_arm_timer(e, &e->boottime);
3449 if (r < 0)
3450 return r;
3451
3452 r = event_arm_timer(e, &e->monotonic);
3453 if (r < 0)
3454 return r;
3455
3456 r = event_arm_timer(e, &e->realtime_alarm);
3457 if (r < 0)
3458 return r;
3459
3460 r = event_arm_timer(e, &e->boottime_alarm);
3461 if (r < 0)
3462 return r;
3463
3464 event_close_inode_data_fds(e);
3465
3466 if (event_next_pending(e) || e->need_process_child)
3467 goto pending;
3468
3469 e->state = SD_EVENT_ARMED;
3470
3471 return 0;
3472
3473 pending:
3474 e->state = SD_EVENT_ARMED;
3475 r = sd_event_wait(e, 0);
3476 if (r == 0)
3477 e->state = SD_EVENT_ARMED;
3478
3479 return r;
3480 }
3481
3482 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3483 size_t event_queue_max;
3484 int r, m, i;
3485
3486 assert_return(e, -EINVAL);
3487 assert_return(e = event_resolve(e), -ENOPKG);
3488 assert_return(!event_pid_changed(e), -ECHILD);
3489 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3490 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3491
3492 if (e->exit_requested) {
3493 e->state = SD_EVENT_PENDING;
3494 return 1;
3495 }
3496
3497 event_queue_max = MAX(e->n_sources, 1u);
3498 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3499 return -ENOMEM;
3500
3501 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3502 if (e->inotify_data_buffered)
3503 timeout = 0;
3504
3505 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
3506 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3507 if (m < 0) {
3508 if (errno == EINTR) {
3509 e->state = SD_EVENT_PENDING;
3510 return 1;
3511 }
3512
3513 r = -errno;
3514 goto finish;
3515 }
3516
3517 triple_timestamp_get(&e->timestamp);
3518
3519 for (i = 0; i < m; i++) {
3520
3521 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3522 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
3523 else {
3524 WakeupType *t = e->event_queue[i].data.ptr;
3525
3526 switch (*t) {
3527
3528 case WAKEUP_EVENT_SOURCE: {
3529 sd_event_source *s = e->event_queue[i].data.ptr;
3530
3531 assert(s);
3532
3533 switch (s->type) {
3534
3535 case SOURCE_IO:
3536 r = process_io(e, s, e->event_queue[i].events);
3537 break;
3538
3539 case SOURCE_CHILD:
3540 r = process_pidfd(e, s, e->event_queue[i].events);
3541 break;
3542
3543 default:
3544 assert_not_reached("Unexpected event source type");
3545 }
3546
3547 break;
3548 }
3549
3550 case WAKEUP_CLOCK_DATA: {
3551 struct clock_data *d = e->event_queue[i].data.ptr;
3552
3553 assert(d);
3554
3555 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
3556 break;
3557 }
3558
3559 case WAKEUP_SIGNAL_DATA:
3560 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3561 break;
3562
3563 case WAKEUP_INOTIFY_DATA:
3564 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3565 break;
3566
3567 default:
3568 assert_not_reached("Invalid wake-up pointer");
3569 }
3570 }
3571 if (r < 0)
3572 goto finish;
3573 }
3574
3575 r = process_watchdog(e);
3576 if (r < 0)
3577 goto finish;
3578
3579 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3580 if (r < 0)
3581 goto finish;
3582
3583 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3584 if (r < 0)
3585 goto finish;
3586
3587 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3588 if (r < 0)
3589 goto finish;
3590
3591 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3592 if (r < 0)
3593 goto finish;
3594
3595 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3596 if (r < 0)
3597 goto finish;
3598
3599 if (e->need_process_child) {
3600 r = process_child(e);
3601 if (r < 0)
3602 goto finish;
3603 }
3604
3605 r = process_inotify(e);
3606 if (r < 0)
3607 goto finish;
3608
3609 if (event_next_pending(e)) {
3610 e->state = SD_EVENT_PENDING;
3611
3612 return 1;
3613 }
3614
3615 r = 0;
3616
3617 finish:
3618 e->state = SD_EVENT_INITIAL;
3619
3620 return r;
3621 }
3622
3623 _public_ int sd_event_dispatch(sd_event *e) {
3624 sd_event_source *p;
3625 int r;
3626
3627 assert_return(e, -EINVAL);
3628 assert_return(e = event_resolve(e), -ENOPKG);
3629 assert_return(!event_pid_changed(e), -ECHILD);
3630 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3631 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3632
3633 if (e->exit_requested)
3634 return dispatch_exit(e);
3635
3636 p = event_next_pending(e);
3637 if (p) {
3638 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3639
3640 ref = sd_event_ref(e);
3641 e->state = SD_EVENT_RUNNING;
3642 r = source_dispatch(p);
3643 e->state = SD_EVENT_INITIAL;
3644 return r;
3645 }
3646
3647 e->state = SD_EVENT_INITIAL;
3648
3649 return 1;
3650 }
3651
3652 static void event_log_delays(sd_event *e) {
3653 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3654 size_t l, i;
3655
3656 p = b;
3657 l = sizeof(b);
3658 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3659 l = strpcpyf(&p, l, "%u ", e->delays[i]);
3660 e->delays[i] = 0;
3661 }
3662 log_debug("Event loop iterations: %s", b);
3663 }
3664
3665 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3666 int r;
3667
3668 assert_return(e, -EINVAL);
3669 assert_return(e = event_resolve(e), -ENOPKG);
3670 assert_return(!event_pid_changed(e), -ECHILD);
3671 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3672 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3673
3674 if (e->profile_delays && e->last_run) {
3675 usec_t this_run;
3676 unsigned l;
3677
3678 this_run = now(CLOCK_MONOTONIC);
3679
3680 l = u64log2(this_run - e->last_run);
3681 assert(l < sizeof(e->delays));
3682 e->delays[l]++;
3683
3684 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3685 event_log_delays(e);
3686 e->last_log = this_run;
3687 }
3688 }
3689
3690 r = sd_event_prepare(e);
3691 if (r == 0)
3692 /* There was nothing? Then wait... */
3693 r = sd_event_wait(e, timeout);
3694
3695 if (e->profile_delays)
3696 e->last_run = now(CLOCK_MONOTONIC);
3697
3698 if (r > 0) {
3699 /* There's something now, then let's dispatch it */
3700 r = sd_event_dispatch(e);
3701 if (r < 0)
3702 return r;
3703
3704 return 1;
3705 }
3706
3707 return r;
3708 }
3709
3710 _public_ int sd_event_loop(sd_event *e) {
3711 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3712 int r;
3713
3714 assert_return(e, -EINVAL);
3715 assert_return(e = event_resolve(e), -ENOPKG);
3716 assert_return(!event_pid_changed(e), -ECHILD);
3717 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3718
3719 ref = sd_event_ref(e);
3720
3721 while (e->state != SD_EVENT_FINISHED) {
3722 r = sd_event_run(e, (uint64_t) -1);
3723 if (r < 0)
3724 return r;
3725 }
3726
3727 return e->exit_code;
3728 }
3729
3730 _public_ int sd_event_get_fd(sd_event *e) {
3731
3732 assert_return(e, -EINVAL);
3733 assert_return(e = event_resolve(e), -ENOPKG);
3734 assert_return(!event_pid_changed(e), -ECHILD);
3735
3736 return e->epoll_fd;
3737 }
3738
3739 _public_ int sd_event_get_state(sd_event *e) {
3740 assert_return(e, -EINVAL);
3741 assert_return(e = event_resolve(e), -ENOPKG);
3742 assert_return(!event_pid_changed(e), -ECHILD);
3743
3744 return e->state;
3745 }
3746
3747 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3748 assert_return(e, -EINVAL);
3749 assert_return(e = event_resolve(e), -ENOPKG);
3750 assert_return(code, -EINVAL);
3751 assert_return(!event_pid_changed(e), -ECHILD);
3752
3753 if (!e->exit_requested)
3754 return -ENODATA;
3755
3756 *code = e->exit_code;
3757 return 0;
3758 }
3759
3760 _public_ int sd_event_exit(sd_event *e, int code) {
3761 assert_return(e, -EINVAL);
3762 assert_return(e = event_resolve(e), -ENOPKG);
3763 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3764 assert_return(!event_pid_changed(e), -ECHILD);
3765
3766 e->exit_requested = true;
3767 e->exit_code = code;
3768
3769 return 0;
3770 }
3771
3772 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3773 assert_return(e, -EINVAL);
3774 assert_return(e = event_resolve(e), -ENOPKG);
3775 assert_return(usec, -EINVAL);
3776 assert_return(!event_pid_changed(e), -ECHILD);
3777
3778 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3779 return -EOPNOTSUPP;
3780
3781 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3782 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3783 * the purpose of getting the time this doesn't matter. */
3784 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3785 return -EOPNOTSUPP;
3786
3787 if (!triple_timestamp_is_set(&e->timestamp)) {
3788 /* Implicitly fall back to now() if we never ran
3789 * before and thus have no cached time. */
3790 *usec = now(clock);
3791 return 1;
3792 }
3793
3794 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3795 return 0;
3796 }
3797
3798 _public_ int sd_event_default(sd_event **ret) {
3799 sd_event *e = NULL;
3800 int r;
3801
3802 if (!ret)
3803 return !!default_event;
3804
3805 if (default_event) {
3806 *ret = sd_event_ref(default_event);
3807 return 0;
3808 }
3809
3810 r = sd_event_new(&e);
3811 if (r < 0)
3812 return r;
3813
3814 e->default_event_ptr = &default_event;
3815 e->tid = gettid();
3816 default_event = e;
3817
3818 *ret = e;
3819 return 1;
3820 }
3821
3822 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3823 assert_return(e, -EINVAL);
3824 assert_return(e = event_resolve(e), -ENOPKG);
3825 assert_return(tid, -EINVAL);
3826 assert_return(!event_pid_changed(e), -ECHILD);
3827
3828 if (e->tid != 0) {
3829 *tid = e->tid;
3830 return 0;
3831 }
3832
3833 return -ENXIO;
3834 }
3835
3836 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3837 int r;
3838
3839 assert_return(e, -EINVAL);
3840 assert_return(e = event_resolve(e), -ENOPKG);
3841 assert_return(!event_pid_changed(e), -ECHILD);
3842
3843 if (e->watchdog == !!b)
3844 return e->watchdog;
3845
3846 if (b) {
3847 struct epoll_event ev;
3848
3849 r = sd_watchdog_enabled(false, &e->watchdog_period);
3850 if (r <= 0)
3851 return r;
3852
3853 /* Issue first ping immediately */
3854 sd_notify(false, "WATCHDOG=1");
3855 e->watchdog_last = now(CLOCK_MONOTONIC);
3856
3857 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3858 if (e->watchdog_fd < 0)
3859 return -errno;
3860
3861 r = arm_watchdog(e);
3862 if (r < 0)
3863 goto fail;
3864
3865 ev = (struct epoll_event) {
3866 .events = EPOLLIN,
3867 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3868 };
3869
3870 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3871 if (r < 0) {
3872 r = -errno;
3873 goto fail;
3874 }
3875
3876 } else {
3877 if (e->watchdog_fd >= 0) {
3878 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3879 e->watchdog_fd = safe_close(e->watchdog_fd);
3880 }
3881 }
3882
3883 e->watchdog = !!b;
3884 return e->watchdog;
3885
3886 fail:
3887 e->watchdog_fd = safe_close(e->watchdog_fd);
3888 return r;
3889 }
3890
3891 _public_ int sd_event_get_watchdog(sd_event *e) {
3892 assert_return(e, -EINVAL);
3893 assert_return(e = event_resolve(e), -ENOPKG);
3894 assert_return(!event_pid_changed(e), -ECHILD);
3895
3896 return e->watchdog;
3897 }
3898
3899 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3900 assert_return(e, -EINVAL);
3901 assert_return(e = event_resolve(e), -ENOPKG);
3902 assert_return(!event_pid_changed(e), -ECHILD);
3903
3904 *ret = e->iteration;
3905 return 0;
3906 }
3907
3908 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3909 assert_return(s, -EINVAL);
3910
3911 s->destroy_callback = callback;
3912 return 0;
3913 }
3914
3915 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3916 assert_return(s, -EINVAL);
3917
3918 if (ret)
3919 *ret = s->destroy_callback;
3920
3921 return !!s->destroy_callback;
3922 }
3923
3924 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3925 assert_return(s, -EINVAL);
3926
3927 return s->floating;
3928 }
3929
3930 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3931 assert_return(s, -EINVAL);
3932
3933 if (s->floating == !!b)
3934 return 0;
3935
3936 if (!s->event) /* Already disconnected */
3937 return -ESTALE;
3938
3939 s->floating = b;
3940
3941 if (b) {
3942 sd_event_source_ref(s);
3943 sd_event_unref(s->event);
3944 } else {
3945 sd_event_ref(s->event);
3946 sd_event_source_unref(s);
3947 }
3948
3949 return 1;
3950 }