]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
860eb048ff5be1975a3fdfb140fbaa25587dacb7
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "env-util.h"
13 #include "event-source.h"
14 #include "fd-util.h"
15 #include "fs-util.h"
16 #include "hashmap.h"
17 #include "list.h"
18 #include "macro.h"
19 #include "memory-util.h"
20 #include "missing_syscall.h"
21 #include "prioq.h"
22 #include "process-util.h"
23 #include "set.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "strxcpyx.h"
28 #include "time-util.h"
29
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38 }
39
40 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
53 [SOURCE_INOTIFY] = "inotify",
54 };
55
56 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event {
61 unsigned n_ref;
62
63 int epoll_fd;
64 int watchdog_fd;
65
66 Prioq *pending;
67 Prioq *prepare;
68
69 /* timerfd_create() only supports these five clocks so far. We
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
73 struct clock_data boottime;
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
77
78 usec_t perturb;
79
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
82
83 Hashmap *child_sources;
84 unsigned n_enabled_child_sources;
85
86 Set *post_sources;
87
88 Prioq *exit;
89
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
98 pid_t original_pid;
99
100 uint64_t iteration;
101 triple_timestamp timestamp;
102 int state;
103
104 bool exit_requested:1;
105 bool need_process_child:1;
106 bool watchdog:1;
107 bool profile_delays:1;
108
109 int exit_code;
110
111 pid_t tid;
112 sd_event **default_event_ptr;
113
114 usec_t watchdog_last, watchdog_period;
115
116 unsigned n_sources;
117
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
121 LIST_HEAD(sd_event_source, sources);
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
125 };
126
127 static thread_local sd_event *default_event = NULL;
128
129 static void source_disconnect(sd_event_source *s);
130 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
131
132 static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134 }
135
136 static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
138 int r;
139
140 assert(x->pending);
141 assert(y->pending);
142
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
145 return -1;
146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
147 return 1;
148
149 /* Lower priority values first */
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
153
154 /* Older entries first */
155 return CMP(x->pending_iteration, y->pending_iteration);
156 }
157
158 static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
160 int r;
161
162 assert(x->prepare);
163 assert(y->prepare);
164
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
177
178 /* Lower priority values first */
179 return CMP(x->priority, y->priority);
180 }
181
182 static int earliest_time_prioq_compare(const void *a, const void *b) {
183 const sd_event_source *x = a, *y = b;
184
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
187
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190 return -1;
191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
201 return CMP(x->time.next, y->time.next);
202 }
203
204 static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206 }
207
208 static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
213
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
216 return -1;
217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
228 }
229
230 static int exit_prioq_compare(const void *a, const void *b) {
231 const sd_event_source *x = a, *y = b;
232
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
235
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
238 return -1;
239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
240 return 1;
241
242 /* Lower priority values first */
243 return CMP(x->priority, y->priority);
244 }
245
246 static void free_clock_data(struct clock_data *d) {
247 assert(d);
248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253 }
254
255 static sd_event *event_free(sd_event *e) {
256 sd_event_source *s;
257
258 assert(e);
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
266 assert(e->n_sources == 0);
267
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
271 safe_close(e->epoll_fd);
272 safe_close(e->watchdog_fd);
273
274 free_clock_data(&e->realtime);
275 free_clock_data(&e->boottime);
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
282 prioq_free(e->exit);
283
284 free(e->signal_sources);
285 hashmap_free(e->signal_data);
286
287 hashmap_free(e->inotify_data);
288
289 hashmap_free(e->child_sources);
290 set_free(e->post_sources);
291
292 free(e->event_queue);
293
294 return mfree(e);
295 }
296
297 _public_ int sd_event_new(sd_event** ret) {
298 sd_event *e;
299 int r;
300
301 assert_return(ret, -EINVAL);
302
303 e = new(sd_event, 1);
304 if (!e)
305 return -ENOMEM;
306
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
329
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
332 goto fail;
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
344 e->profile_delays = true;
345 }
346
347 *ret = e;
348 return 0;
349
350 fail:
351 event_free(e);
352 return r;
353 }
354
355 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
356
357 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361 }
362
363 static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
366 /* We don't support people creating an event loop and keeping
367 * it around over a fork(). Let's complain. */
368
369 return e->original_pid != getpid_cached();
370 }
371
372 static void source_io_unregister(sd_event_source *s) {
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
376 if (event_pid_changed(s->event))
377 return;
378
379 if (!s->io.registered)
380 return;
381
382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
385
386 s->io.registered = false;
387 }
388
389 static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
394 assert(s);
395 assert(s->type == SOURCE_IO);
396 assert(enabled != SD_EVENT_OFF);
397
398 struct epoll_event ev = {
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
402 int r;
403
404 r = epoll_ctl(s->event->epoll_fd,
405 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
406 s->io.fd,
407 &ev);
408 if (r < 0)
409 return -errno;
410
411 s->io.registered = true;
412
413 return 0;
414 }
415
416 static void source_child_pidfd_unregister(sd_event_source *s) {
417 assert(s);
418 assert(s->type == SOURCE_CHILD);
419
420 if (event_pid_changed(s->event))
421 return;
422
423 if (!s->child.registered)
424 return;
425
426 if (EVENT_SOURCE_WATCH_PIDFD(s))
427 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
428 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
429 strna(s->description), event_source_type_to_string(s->type));
430
431 s->child.registered = false;
432 }
433
434 static int source_child_pidfd_register(sd_event_source *s, int enabled) {
435 int r;
436
437 assert(s);
438 assert(s->type == SOURCE_CHILD);
439 assert(enabled != SD_EVENT_OFF);
440
441 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
442 struct epoll_event ev = {
443 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
444 .data.ptr = s,
445 };
446
447 if (s->child.registered)
448 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
449 else
450 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
451 if (r < 0)
452 return -errno;
453 }
454
455 s->child.registered = true;
456 return 0;
457 }
458
459 static clockid_t event_source_type_to_clock(EventSourceType t) {
460
461 switch (t) {
462
463 case SOURCE_TIME_REALTIME:
464 return CLOCK_REALTIME;
465
466 case SOURCE_TIME_BOOTTIME:
467 return CLOCK_BOOTTIME;
468
469 case SOURCE_TIME_MONOTONIC:
470 return CLOCK_MONOTONIC;
471
472 case SOURCE_TIME_REALTIME_ALARM:
473 return CLOCK_REALTIME_ALARM;
474
475 case SOURCE_TIME_BOOTTIME_ALARM:
476 return CLOCK_BOOTTIME_ALARM;
477
478 default:
479 return (clockid_t) -1;
480 }
481 }
482
483 static EventSourceType clock_to_event_source_type(clockid_t clock) {
484
485 switch (clock) {
486
487 case CLOCK_REALTIME:
488 return SOURCE_TIME_REALTIME;
489
490 case CLOCK_BOOTTIME:
491 return SOURCE_TIME_BOOTTIME;
492
493 case CLOCK_MONOTONIC:
494 return SOURCE_TIME_MONOTONIC;
495
496 case CLOCK_REALTIME_ALARM:
497 return SOURCE_TIME_REALTIME_ALARM;
498
499 case CLOCK_BOOTTIME_ALARM:
500 return SOURCE_TIME_BOOTTIME_ALARM;
501
502 default:
503 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
504 }
505 }
506
507 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
508 assert(e);
509
510 switch (t) {
511
512 case SOURCE_TIME_REALTIME:
513 return &e->realtime;
514
515 case SOURCE_TIME_BOOTTIME:
516 return &e->boottime;
517
518 case SOURCE_TIME_MONOTONIC:
519 return &e->monotonic;
520
521 case SOURCE_TIME_REALTIME_ALARM:
522 return &e->realtime_alarm;
523
524 case SOURCE_TIME_BOOTTIME_ALARM:
525 return &e->boottime_alarm;
526
527 default:
528 return NULL;
529 }
530 }
531
532 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
533 assert(e);
534
535 if (!d)
536 return;
537
538 hashmap_remove(e->signal_data, &d->priority);
539 safe_close(d->fd);
540 free(d);
541 }
542
543 static int event_make_signal_data(
544 sd_event *e,
545 int sig,
546 struct signal_data **ret) {
547
548 struct signal_data *d;
549 bool added = false;
550 sigset_t ss_copy;
551 int64_t priority;
552 int r;
553
554 assert(e);
555
556 if (event_pid_changed(e))
557 return -ECHILD;
558
559 if (e->signal_sources && e->signal_sources[sig])
560 priority = e->signal_sources[sig]->priority;
561 else
562 priority = SD_EVENT_PRIORITY_NORMAL;
563
564 d = hashmap_get(e->signal_data, &priority);
565 if (d) {
566 if (sigismember(&d->sigset, sig) > 0) {
567 if (ret)
568 *ret = d;
569 return 0;
570 }
571 } else {
572 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
573 if (r < 0)
574 return r;
575
576 d = new(struct signal_data, 1);
577 if (!d)
578 return -ENOMEM;
579
580 *d = (struct signal_data) {
581 .wakeup = WAKEUP_SIGNAL_DATA,
582 .fd = -1,
583 .priority = priority,
584 };
585
586 r = hashmap_put(e->signal_data, &d->priority, d);
587 if (r < 0) {
588 free(d);
589 return r;
590 }
591
592 added = true;
593 }
594
595 ss_copy = d->sigset;
596 assert_se(sigaddset(&ss_copy, sig) >= 0);
597
598 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
599 if (r < 0) {
600 r = -errno;
601 goto fail;
602 }
603
604 d->sigset = ss_copy;
605
606 if (d->fd >= 0) {
607 if (ret)
608 *ret = d;
609 return 0;
610 }
611
612 d->fd = fd_move_above_stdio(r);
613
614 struct epoll_event ev = {
615 .events = EPOLLIN,
616 .data.ptr = d,
617 };
618
619 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
620 if (r < 0) {
621 r = -errno;
622 goto fail;
623 }
624
625 if (ret)
626 *ret = d;
627
628 return 0;
629
630 fail:
631 if (added)
632 event_free_signal_data(e, d);
633
634 return r;
635 }
636
637 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
638 assert(e);
639 assert(d);
640
641 /* Turns off the specified signal in the signal data
642 * object. If the signal mask of the object becomes empty that
643 * way removes it. */
644
645 if (sigismember(&d->sigset, sig) == 0)
646 return;
647
648 assert_se(sigdelset(&d->sigset, sig) >= 0);
649
650 if (sigisemptyset(&d->sigset)) {
651 /* If all the mask is all-zero we can get rid of the structure */
652 event_free_signal_data(e, d);
653 return;
654 }
655
656 assert(d->fd >= 0);
657
658 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
659 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
660 }
661
662 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
663 struct signal_data *d;
664 static const int64_t zero_priority = 0;
665
666 assert(e);
667
668 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
669 * and possibly drop the signalfd for it. */
670
671 if (sig == SIGCHLD &&
672 e->n_enabled_child_sources > 0)
673 return;
674
675 if (e->signal_sources &&
676 e->signal_sources[sig] &&
677 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
678 return;
679
680 /*
681 * The specified signal might be enabled in three different queues:
682 *
683 * 1) the one that belongs to the priority passed (if it is non-NULL)
684 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
685 * 3) the 0 priority (to cover the SIGCHLD case)
686 *
687 * Hence, let's remove it from all three here.
688 */
689
690 if (priority) {
691 d = hashmap_get(e->signal_data, priority);
692 if (d)
693 event_unmask_signal_data(e, d, sig);
694 }
695
696 if (e->signal_sources && e->signal_sources[sig]) {
697 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
698 if (d)
699 event_unmask_signal_data(e, d, sig);
700 }
701
702 d = hashmap_get(e->signal_data, &zero_priority);
703 if (d)
704 event_unmask_signal_data(e, d, sig);
705 }
706
707 static void source_disconnect(sd_event_source *s) {
708 sd_event *event;
709
710 assert(s);
711
712 if (!s->event)
713 return;
714
715 assert(s->event->n_sources > 0);
716
717 switch (s->type) {
718
719 case SOURCE_IO:
720 if (s->io.fd >= 0)
721 source_io_unregister(s);
722
723 break;
724
725 case SOURCE_TIME_REALTIME:
726 case SOURCE_TIME_BOOTTIME:
727 case SOURCE_TIME_MONOTONIC:
728 case SOURCE_TIME_REALTIME_ALARM:
729 case SOURCE_TIME_BOOTTIME_ALARM: {
730 struct clock_data *d;
731
732 d = event_get_clock_data(s->event, s->type);
733 assert(d);
734
735 prioq_remove(d->earliest, s, &s->time.earliest_index);
736 prioq_remove(d->latest, s, &s->time.latest_index);
737 d->needs_rearm = true;
738 break;
739 }
740
741 case SOURCE_SIGNAL:
742 if (s->signal.sig > 0) {
743
744 if (s->event->signal_sources)
745 s->event->signal_sources[s->signal.sig] = NULL;
746
747 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
748 }
749
750 break;
751
752 case SOURCE_CHILD:
753 if (s->child.pid > 0) {
754 if (s->enabled != SD_EVENT_OFF) {
755 assert(s->event->n_enabled_child_sources > 0);
756 s->event->n_enabled_child_sources--;
757 }
758
759 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
760 }
761
762 if (EVENT_SOURCE_WATCH_PIDFD(s))
763 source_child_pidfd_unregister(s);
764 else
765 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
766
767 break;
768
769 case SOURCE_DEFER:
770 /* nothing */
771 break;
772
773 case SOURCE_POST:
774 set_remove(s->event->post_sources, s);
775 break;
776
777 case SOURCE_EXIT:
778 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
779 break;
780
781 case SOURCE_INOTIFY: {
782 struct inode_data *inode_data;
783
784 inode_data = s->inotify.inode_data;
785 if (inode_data) {
786 struct inotify_data *inotify_data;
787 assert_se(inotify_data = inode_data->inotify_data);
788
789 /* Detach this event source from the inode object */
790 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
791 s->inotify.inode_data = NULL;
792
793 if (s->pending) {
794 assert(inotify_data->n_pending > 0);
795 inotify_data->n_pending--;
796 }
797
798 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
799 * continued to being watched. That's because inotify doesn't really have an API for that: we
800 * can only change watch masks with access to the original inode either by fd or by path. But
801 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
802 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
803 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
804 * there), but given the need for open_by_handle_at() which is privileged and not universally
805 * available this would be quite an incomplete solution. Hence we go the other way, leave the
806 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
807 * anymore after reception. Yes, this sucks, but … Linux … */
808
809 /* Maybe release the inode data (and its inotify) */
810 event_gc_inode_data(s->event, inode_data);
811 }
812
813 break;
814 }
815
816 default:
817 assert_not_reached("Wut? I shouldn't exist.");
818 }
819
820 if (s->pending)
821 prioq_remove(s->event->pending, s, &s->pending_index);
822
823 if (s->prepare)
824 prioq_remove(s->event->prepare, s, &s->prepare_index);
825
826 event = TAKE_PTR(s->event);
827 LIST_REMOVE(sources, event->sources, s);
828 event->n_sources--;
829
830 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
831 * pidfd associated with this event source, which we'll do only on source_free(). */
832
833 if (!s->floating)
834 sd_event_unref(event);
835 }
836
837 static void source_free(sd_event_source *s) {
838 assert(s);
839
840 source_disconnect(s);
841
842 if (s->type == SOURCE_IO && s->io.owned)
843 s->io.fd = safe_close(s->io.fd);
844
845 if (s->type == SOURCE_CHILD) {
846 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
847
848 if (s->child.process_owned) {
849
850 if (!s->child.exited) {
851 bool sent = false;
852
853 if (s->child.pidfd >= 0) {
854 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
855 if (errno == ESRCH) /* Already dead */
856 sent = true;
857 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
858 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
859 s->child.pid);
860 } else
861 sent = true;
862 }
863
864 if (!sent)
865 if (kill(s->child.pid, SIGKILL) < 0)
866 if (errno != ESRCH) /* Already dead */
867 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
868 s->child.pid);
869 }
870
871 if (!s->child.waited) {
872 siginfo_t si = {};
873
874 /* Reap the child if we can */
875 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
876 }
877 }
878
879 if (s->child.pidfd_owned)
880 s->child.pidfd = safe_close(s->child.pidfd);
881 }
882
883 if (s->destroy_callback)
884 s->destroy_callback(s->userdata);
885
886 free(s->description);
887 free(s);
888 }
889 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
890
891 static int source_set_pending(sd_event_source *s, bool b) {
892 int r;
893
894 assert(s);
895 assert(s->type != SOURCE_EXIT);
896
897 if (s->pending == b)
898 return 0;
899
900 s->pending = b;
901
902 if (b) {
903 s->pending_iteration = s->event->iteration;
904
905 r = prioq_put(s->event->pending, s, &s->pending_index);
906 if (r < 0) {
907 s->pending = false;
908 return r;
909 }
910 } else
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
912
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
915
916 d = event_get_clock_data(s->event, s->type);
917 assert(d);
918
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
922 }
923
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
926
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
929 d->current = NULL;
930 }
931
932 if (s->type == SOURCE_INOTIFY) {
933
934 assert(s->inotify.inode_data);
935 assert(s->inotify.inode_data->inotify_data);
936
937 if (b)
938 s->inotify.inode_data->inotify_data->n_pending ++;
939 else {
940 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
941 s->inotify.inode_data->inotify_data->n_pending --;
942 }
943 }
944
945 return 0;
946 }
947
948 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
949 sd_event_source *s;
950
951 assert(e);
952
953 s = new(sd_event_source, 1);
954 if (!s)
955 return NULL;
956
957 *s = (struct sd_event_source) {
958 .n_ref = 1,
959 .event = e,
960 .floating = floating,
961 .type = type,
962 .pending_index = PRIOQ_IDX_NULL,
963 .prepare_index = PRIOQ_IDX_NULL,
964 };
965
966 if (!floating)
967 sd_event_ref(e);
968
969 LIST_PREPEND(sources, e->sources, s);
970 e->n_sources++;
971
972 return s;
973 }
974
975 _public_ int sd_event_add_io(
976 sd_event *e,
977 sd_event_source **ret,
978 int fd,
979 uint32_t events,
980 sd_event_io_handler_t callback,
981 void *userdata) {
982
983 _cleanup_(source_freep) sd_event_source *s = NULL;
984 int r;
985
986 assert_return(e, -EINVAL);
987 assert_return(e = event_resolve(e), -ENOPKG);
988 assert_return(fd >= 0, -EBADF);
989 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
990 assert_return(callback, -EINVAL);
991 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
992 assert_return(!event_pid_changed(e), -ECHILD);
993
994 s = source_new(e, !ret, SOURCE_IO);
995 if (!s)
996 return -ENOMEM;
997
998 s->wakeup = WAKEUP_EVENT_SOURCE;
999 s->io.fd = fd;
1000 s->io.events = events;
1001 s->io.callback = callback;
1002 s->userdata = userdata;
1003 s->enabled = SD_EVENT_ON;
1004
1005 r = source_io_register(s, s->enabled, events);
1006 if (r < 0)
1007 return r;
1008
1009 if (ret)
1010 *ret = s;
1011 TAKE_PTR(s);
1012
1013 return 0;
1014 }
1015
1016 static void initialize_perturb(sd_event *e) {
1017 sd_id128_t bootid = {};
1018
1019 /* When we sleep for longer, we try to realign the wakeup to
1020 the same time within each minute/second/250ms, so that
1021 events all across the system can be coalesced into a single
1022 CPU wakeup. However, let's take some system-specific
1023 randomness for this value, so that in a network of systems
1024 with synced clocks timer events are distributed a
1025 bit. Here, we calculate a perturbation usec offset from the
1026 boot ID. */
1027
1028 if (_likely_(e->perturb != USEC_INFINITY))
1029 return;
1030
1031 if (sd_id128_get_boot(&bootid) >= 0)
1032 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1033 }
1034
1035 static int event_setup_timer_fd(
1036 sd_event *e,
1037 struct clock_data *d,
1038 clockid_t clock) {
1039
1040 assert(e);
1041 assert(d);
1042
1043 if (_likely_(d->fd >= 0))
1044 return 0;
1045
1046 _cleanup_close_ int fd = -1;
1047 int r;
1048
1049 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1050 if (fd < 0)
1051 return -errno;
1052
1053 fd = fd_move_above_stdio(fd);
1054
1055 struct epoll_event ev = {
1056 .events = EPOLLIN,
1057 .data.ptr = d,
1058 };
1059
1060 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1061 if (r < 0)
1062 return -errno;
1063
1064 d->fd = TAKE_FD(fd);
1065 return 0;
1066 }
1067
1068 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1069 assert(s);
1070
1071 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1072 }
1073
1074 _public_ int sd_event_add_time(
1075 sd_event *e,
1076 sd_event_source **ret,
1077 clockid_t clock,
1078 uint64_t usec,
1079 uint64_t accuracy,
1080 sd_event_time_handler_t callback,
1081 void *userdata) {
1082
1083 EventSourceType type;
1084 _cleanup_(source_freep) sd_event_source *s = NULL;
1085 struct clock_data *d;
1086 int r;
1087
1088 assert_return(e, -EINVAL);
1089 assert_return(e = event_resolve(e), -ENOPKG);
1090 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1091 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1092 assert_return(!event_pid_changed(e), -ECHILD);
1093
1094 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1095 return -EOPNOTSUPP;
1096
1097 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1098 if (type < 0)
1099 return -EOPNOTSUPP;
1100
1101 if (!callback)
1102 callback = time_exit_callback;
1103
1104 d = event_get_clock_data(e, type);
1105 assert(d);
1106
1107 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1108 if (r < 0)
1109 return r;
1110
1111 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1112 if (r < 0)
1113 return r;
1114
1115 if (d->fd < 0) {
1116 r = event_setup_timer_fd(e, d, clock);
1117 if (r < 0)
1118 return r;
1119 }
1120
1121 s = source_new(e, !ret, type);
1122 if (!s)
1123 return -ENOMEM;
1124
1125 s->time.next = usec;
1126 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1127 s->time.callback = callback;
1128 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1129 s->userdata = userdata;
1130 s->enabled = SD_EVENT_ONESHOT;
1131
1132 d->needs_rearm = true;
1133
1134 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1135 if (r < 0)
1136 return r;
1137
1138 r = prioq_put(d->latest, s, &s->time.latest_index);
1139 if (r < 0)
1140 return r;
1141
1142 if (ret)
1143 *ret = s;
1144 TAKE_PTR(s);
1145
1146 return 0;
1147 }
1148
1149 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1150 assert(s);
1151
1152 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1153 }
1154
1155 _public_ int sd_event_add_signal(
1156 sd_event *e,
1157 sd_event_source **ret,
1158 int sig,
1159 sd_event_signal_handler_t callback,
1160 void *userdata) {
1161
1162 _cleanup_(source_freep) sd_event_source *s = NULL;
1163 struct signal_data *d;
1164 int r;
1165
1166 assert_return(e, -EINVAL);
1167 assert_return(e = event_resolve(e), -ENOPKG);
1168 assert_return(SIGNAL_VALID(sig), -EINVAL);
1169 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1170 assert_return(!event_pid_changed(e), -ECHILD);
1171
1172 if (!callback)
1173 callback = signal_exit_callback;
1174
1175 r = signal_is_blocked(sig);
1176 if (r < 0)
1177 return r;
1178 if (r == 0)
1179 return -EBUSY;
1180
1181 if (!e->signal_sources) {
1182 e->signal_sources = new0(sd_event_source*, _NSIG);
1183 if (!e->signal_sources)
1184 return -ENOMEM;
1185 } else if (e->signal_sources[sig])
1186 return -EBUSY;
1187
1188 s = source_new(e, !ret, SOURCE_SIGNAL);
1189 if (!s)
1190 return -ENOMEM;
1191
1192 s->signal.sig = sig;
1193 s->signal.callback = callback;
1194 s->userdata = userdata;
1195 s->enabled = SD_EVENT_ON;
1196
1197 e->signal_sources[sig] = s;
1198
1199 r = event_make_signal_data(e, sig, &d);
1200 if (r < 0)
1201 return r;
1202
1203 /* Use the signal name as description for the event source by default */
1204 (void) sd_event_source_set_description(s, signal_to_string(sig));
1205
1206 if (ret)
1207 *ret = s;
1208 TAKE_PTR(s);
1209
1210 return 0;
1211 }
1212
1213 static bool shall_use_pidfd(void) {
1214 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1215 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1216 }
1217
1218 _public_ int sd_event_add_child(
1219 sd_event *e,
1220 sd_event_source **ret,
1221 pid_t pid,
1222 int options,
1223 sd_event_child_handler_t callback,
1224 void *userdata) {
1225
1226 _cleanup_(source_freep) sd_event_source *s = NULL;
1227 int r;
1228
1229 assert_return(e, -EINVAL);
1230 assert_return(e = event_resolve(e), -ENOPKG);
1231 assert_return(pid > 1, -EINVAL);
1232 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1233 assert_return(options != 0, -EINVAL);
1234 assert_return(callback, -EINVAL);
1235 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1236 assert_return(!event_pid_changed(e), -ECHILD);
1237
1238 if (e->n_enabled_child_sources == 0) {
1239 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1240 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1241 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1242 * take effect.
1243 *
1244 * (As an optimization we only do this check on the first child event source created.) */
1245 r = signal_is_blocked(SIGCHLD);
1246 if (r < 0)
1247 return r;
1248 if (r == 0)
1249 return -EBUSY;
1250 }
1251
1252 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1253 if (r < 0)
1254 return r;
1255
1256 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1257 return -EBUSY;
1258
1259 s = source_new(e, !ret, SOURCE_CHILD);
1260 if (!s)
1261 return -ENOMEM;
1262
1263 s->wakeup = WAKEUP_EVENT_SOURCE;
1264 s->child.pid = pid;
1265 s->child.options = options;
1266 s->child.callback = callback;
1267 s->userdata = userdata;
1268 s->enabled = SD_EVENT_ONESHOT;
1269
1270 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1271 * pin the PID, and make regular waitid() handling race-free. */
1272
1273 if (shall_use_pidfd()) {
1274 s->child.pidfd = pidfd_open(s->child.pid, 0);
1275 if (s->child.pidfd < 0) {
1276 /* Propagate errors unless the syscall is not supported or blocked */
1277 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1278 return -errno;
1279 } else
1280 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1281 } else
1282 s->child.pidfd = -1;
1283
1284 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1285 if (r < 0)
1286 return r;
1287
1288 e->n_enabled_child_sources++;
1289
1290 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1291 /* We have a pidfd and we only want to watch for exit */
1292
1293 r = source_child_pidfd_register(s, s->enabled);
1294 if (r < 0) {
1295 e->n_enabled_child_sources--;
1296 return r;
1297 }
1298 } else {
1299 /* We have no pidfd or we shall wait for some other event than WEXITED */
1300
1301 r = event_make_signal_data(e, SIGCHLD, NULL);
1302 if (r < 0) {
1303 e->n_enabled_child_sources--;
1304 return r;
1305 }
1306
1307 e->need_process_child = true;
1308 }
1309
1310 if (ret)
1311 *ret = s;
1312
1313 TAKE_PTR(s);
1314 return 0;
1315 }
1316
1317 _public_ int sd_event_add_child_pidfd(
1318 sd_event *e,
1319 sd_event_source **ret,
1320 int pidfd,
1321 int options,
1322 sd_event_child_handler_t callback,
1323 void *userdata) {
1324
1325
1326 _cleanup_(source_freep) sd_event_source *s = NULL;
1327 pid_t pid;
1328 int r;
1329
1330 assert_return(e, -EINVAL);
1331 assert_return(e = event_resolve(e), -ENOPKG);
1332 assert_return(pidfd >= 0, -EBADF);
1333 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1334 assert_return(options != 0, -EINVAL);
1335 assert_return(callback, -EINVAL);
1336 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1337 assert_return(!event_pid_changed(e), -ECHILD);
1338
1339 if (e->n_enabled_child_sources == 0) {
1340 r = signal_is_blocked(SIGCHLD);
1341 if (r < 0)
1342 return r;
1343 if (r == 0)
1344 return -EBUSY;
1345 }
1346
1347 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1348 if (r < 0)
1349 return r;
1350
1351 r = pidfd_get_pid(pidfd, &pid);
1352 if (r < 0)
1353 return r;
1354
1355 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1356 return -EBUSY;
1357
1358 s = source_new(e, !ret, SOURCE_CHILD);
1359 if (!s)
1360 return -ENOMEM;
1361
1362 s->wakeup = WAKEUP_EVENT_SOURCE;
1363 s->child.pidfd = pidfd;
1364 s->child.pid = pid;
1365 s->child.options = options;
1366 s->child.callback = callback;
1367 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1368 s->userdata = userdata;
1369 s->enabled = SD_EVENT_ONESHOT;
1370
1371 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1372 if (r < 0)
1373 return r;
1374
1375 e->n_enabled_child_sources++;
1376
1377 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1378 /* We only want to watch for WEXITED */
1379
1380 r = source_child_pidfd_register(s, s->enabled);
1381 if (r < 0) {
1382 e->n_enabled_child_sources--;
1383 return r;
1384 }
1385 } else {
1386 /* We shall wait for some other event than WEXITED */
1387
1388 r = event_make_signal_data(e, SIGCHLD, NULL);
1389 if (r < 0) {
1390 e->n_enabled_child_sources--;
1391 return r;
1392 }
1393
1394 e->need_process_child = true;
1395 }
1396
1397 if (ret)
1398 *ret = s;
1399
1400 TAKE_PTR(s);
1401 return 0;
1402 }
1403
1404 _public_ int sd_event_add_defer(
1405 sd_event *e,
1406 sd_event_source **ret,
1407 sd_event_handler_t callback,
1408 void *userdata) {
1409
1410 _cleanup_(source_freep) sd_event_source *s = NULL;
1411 int r;
1412
1413 assert_return(e, -EINVAL);
1414 assert_return(e = event_resolve(e), -ENOPKG);
1415 assert_return(callback, -EINVAL);
1416 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1417 assert_return(!event_pid_changed(e), -ECHILD);
1418
1419 s = source_new(e, !ret, SOURCE_DEFER);
1420 if (!s)
1421 return -ENOMEM;
1422
1423 s->defer.callback = callback;
1424 s->userdata = userdata;
1425 s->enabled = SD_EVENT_ONESHOT;
1426
1427 r = source_set_pending(s, true);
1428 if (r < 0)
1429 return r;
1430
1431 if (ret)
1432 *ret = s;
1433 TAKE_PTR(s);
1434
1435 return 0;
1436 }
1437
1438 _public_ int sd_event_add_post(
1439 sd_event *e,
1440 sd_event_source **ret,
1441 sd_event_handler_t callback,
1442 void *userdata) {
1443
1444 _cleanup_(source_freep) sd_event_source *s = NULL;
1445 int r;
1446
1447 assert_return(e, -EINVAL);
1448 assert_return(e = event_resolve(e), -ENOPKG);
1449 assert_return(callback, -EINVAL);
1450 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1451 assert_return(!event_pid_changed(e), -ECHILD);
1452
1453 s = source_new(e, !ret, SOURCE_POST);
1454 if (!s)
1455 return -ENOMEM;
1456
1457 s->post.callback = callback;
1458 s->userdata = userdata;
1459 s->enabled = SD_EVENT_ON;
1460
1461 r = set_ensure_put(&e->post_sources, NULL, s);
1462 if (r < 0)
1463 return r;
1464 assert(r > 0);
1465
1466 if (ret)
1467 *ret = s;
1468 TAKE_PTR(s);
1469
1470 return 0;
1471 }
1472
1473 _public_ int sd_event_add_exit(
1474 sd_event *e,
1475 sd_event_source **ret,
1476 sd_event_handler_t callback,
1477 void *userdata) {
1478
1479 _cleanup_(source_freep) sd_event_source *s = NULL;
1480 int r;
1481
1482 assert_return(e, -EINVAL);
1483 assert_return(e = event_resolve(e), -ENOPKG);
1484 assert_return(callback, -EINVAL);
1485 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1486 assert_return(!event_pid_changed(e), -ECHILD);
1487
1488 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1489 if (r < 0)
1490 return r;
1491
1492 s = source_new(e, !ret, SOURCE_EXIT);
1493 if (!s)
1494 return -ENOMEM;
1495
1496 s->exit.callback = callback;
1497 s->userdata = userdata;
1498 s->exit.prioq_index = PRIOQ_IDX_NULL;
1499 s->enabled = SD_EVENT_ONESHOT;
1500
1501 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1502 if (r < 0)
1503 return r;
1504
1505 if (ret)
1506 *ret = s;
1507 TAKE_PTR(s);
1508
1509 return 0;
1510 }
1511
1512 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1513 assert(e);
1514
1515 if (!d)
1516 return;
1517
1518 assert(hashmap_isempty(d->inodes));
1519 assert(hashmap_isempty(d->wd));
1520
1521 if (d->buffer_filled > 0)
1522 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1523
1524 hashmap_free(d->inodes);
1525 hashmap_free(d->wd);
1526
1527 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1528
1529 if (d->fd >= 0) {
1530 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1531 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1532
1533 safe_close(d->fd);
1534 }
1535 free(d);
1536 }
1537
1538 static int event_make_inotify_data(
1539 sd_event *e,
1540 int64_t priority,
1541 struct inotify_data **ret) {
1542
1543 _cleanup_close_ int fd = -1;
1544 struct inotify_data *d;
1545 int r;
1546
1547 assert(e);
1548
1549 d = hashmap_get(e->inotify_data, &priority);
1550 if (d) {
1551 if (ret)
1552 *ret = d;
1553 return 0;
1554 }
1555
1556 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1557 if (fd < 0)
1558 return -errno;
1559
1560 fd = fd_move_above_stdio(fd);
1561
1562 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1563 if (r < 0)
1564 return r;
1565
1566 d = new(struct inotify_data, 1);
1567 if (!d)
1568 return -ENOMEM;
1569
1570 *d = (struct inotify_data) {
1571 .wakeup = WAKEUP_INOTIFY_DATA,
1572 .fd = TAKE_FD(fd),
1573 .priority = priority,
1574 };
1575
1576 r = hashmap_put(e->inotify_data, &d->priority, d);
1577 if (r < 0) {
1578 d->fd = safe_close(d->fd);
1579 free(d);
1580 return r;
1581 }
1582
1583 struct epoll_event ev = {
1584 .events = EPOLLIN,
1585 .data.ptr = d,
1586 };
1587
1588 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1589 r = -errno;
1590 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1591 * remove the fd from the epoll first, which we don't want as we couldn't
1592 * add it in the first place. */
1593 event_free_inotify_data(e, d);
1594 return r;
1595 }
1596
1597 if (ret)
1598 *ret = d;
1599
1600 return 1;
1601 }
1602
1603 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1604 int r;
1605
1606 assert(x);
1607 assert(y);
1608
1609 r = CMP(x->dev, y->dev);
1610 if (r != 0)
1611 return r;
1612
1613 return CMP(x->ino, y->ino);
1614 }
1615
1616 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1617 assert(d);
1618
1619 siphash24_compress(&d->dev, sizeof(d->dev), state);
1620 siphash24_compress(&d->ino, sizeof(d->ino), state);
1621 }
1622
1623 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1624
1625 static void event_free_inode_data(
1626 sd_event *e,
1627 struct inode_data *d) {
1628
1629 assert(e);
1630
1631 if (!d)
1632 return;
1633
1634 assert(!d->event_sources);
1635
1636 if (d->fd >= 0) {
1637 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1638 safe_close(d->fd);
1639 }
1640
1641 if (d->inotify_data) {
1642
1643 if (d->wd >= 0) {
1644 if (d->inotify_data->fd >= 0) {
1645 /* So here's a problem. At the time this runs the watch descriptor might already be
1646 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1647 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1648 * likely case to happen. */
1649
1650 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1651 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1652 }
1653
1654 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1655 }
1656
1657 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1658 }
1659
1660 free(d);
1661 }
1662
1663 static void event_gc_inode_data(
1664 sd_event *e,
1665 struct inode_data *d) {
1666
1667 struct inotify_data *inotify_data;
1668
1669 assert(e);
1670
1671 if (!d)
1672 return;
1673
1674 if (d->event_sources)
1675 return;
1676
1677 inotify_data = d->inotify_data;
1678 event_free_inode_data(e, d);
1679
1680 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1681 event_free_inotify_data(e, inotify_data);
1682 }
1683
1684 static int event_make_inode_data(
1685 sd_event *e,
1686 struct inotify_data *inotify_data,
1687 dev_t dev,
1688 ino_t ino,
1689 struct inode_data **ret) {
1690
1691 struct inode_data *d, key;
1692 int r;
1693
1694 assert(e);
1695 assert(inotify_data);
1696
1697 key = (struct inode_data) {
1698 .ino = ino,
1699 .dev = dev,
1700 };
1701
1702 d = hashmap_get(inotify_data->inodes, &key);
1703 if (d) {
1704 if (ret)
1705 *ret = d;
1706
1707 return 0;
1708 }
1709
1710 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1711 if (r < 0)
1712 return r;
1713
1714 d = new(struct inode_data, 1);
1715 if (!d)
1716 return -ENOMEM;
1717
1718 *d = (struct inode_data) {
1719 .dev = dev,
1720 .ino = ino,
1721 .wd = -1,
1722 .fd = -1,
1723 .inotify_data = inotify_data,
1724 };
1725
1726 r = hashmap_put(inotify_data->inodes, d, d);
1727 if (r < 0) {
1728 free(d);
1729 return r;
1730 }
1731
1732 if (ret)
1733 *ret = d;
1734
1735 return 1;
1736 }
1737
1738 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1739 bool excl_unlink = true;
1740 uint32_t combined = 0;
1741 sd_event_source *s;
1742
1743 assert(d);
1744
1745 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1746 * the IN_EXCL_UNLINK flag is ANDed instead.
1747 *
1748 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1749 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1750 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1751 * events we don't care for client-side. */
1752
1753 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1754
1755 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1756 excl_unlink = false;
1757
1758 combined |= s->inotify.mask;
1759 }
1760
1761 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1762 }
1763
1764 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1765 uint32_t combined_mask;
1766 int wd, r;
1767
1768 assert(d);
1769 assert(d->fd >= 0);
1770
1771 combined_mask = inode_data_determine_mask(d);
1772
1773 if (d->wd >= 0 && combined_mask == d->combined_mask)
1774 return 0;
1775
1776 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1777 if (r < 0)
1778 return r;
1779
1780 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1781 if (wd < 0)
1782 return -errno;
1783
1784 if (d->wd < 0) {
1785 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1786 if (r < 0) {
1787 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1788 return r;
1789 }
1790
1791 d->wd = wd;
1792
1793 } else if (d->wd != wd) {
1794
1795 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1796 (void) inotify_rm_watch(d->fd, wd);
1797 return -EINVAL;
1798 }
1799
1800 d->combined_mask = combined_mask;
1801 return 1;
1802 }
1803
1804 _public_ int sd_event_add_inotify(
1805 sd_event *e,
1806 sd_event_source **ret,
1807 const char *path,
1808 uint32_t mask,
1809 sd_event_inotify_handler_t callback,
1810 void *userdata) {
1811
1812 struct inotify_data *inotify_data = NULL;
1813 struct inode_data *inode_data = NULL;
1814 _cleanup_close_ int fd = -1;
1815 _cleanup_(source_freep) sd_event_source *s = NULL;
1816 struct stat st;
1817 int r;
1818
1819 assert_return(e, -EINVAL);
1820 assert_return(e = event_resolve(e), -ENOPKG);
1821 assert_return(path, -EINVAL);
1822 assert_return(callback, -EINVAL);
1823 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1824 assert_return(!event_pid_changed(e), -ECHILD);
1825
1826 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1827 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1828 * the user can't use them for us. */
1829 if (mask & IN_MASK_ADD)
1830 return -EINVAL;
1831
1832 fd = open(path, O_PATH|O_CLOEXEC|
1833 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1834 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1835 if (fd < 0)
1836 return -errno;
1837
1838 if (fstat(fd, &st) < 0)
1839 return -errno;
1840
1841 s = source_new(e, !ret, SOURCE_INOTIFY);
1842 if (!s)
1843 return -ENOMEM;
1844
1845 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1846 s->inotify.mask = mask;
1847 s->inotify.callback = callback;
1848 s->userdata = userdata;
1849
1850 /* Allocate an inotify object for this priority, and an inode object within it */
1851 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1852 if (r < 0)
1853 return r;
1854
1855 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1856 if (r < 0) {
1857 event_free_inotify_data(e, inotify_data);
1858 return r;
1859 }
1860
1861 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1862 * the event source, until then, for which we need the original inode. */
1863 if (inode_data->fd < 0) {
1864 inode_data->fd = TAKE_FD(fd);
1865 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1866 }
1867
1868 /* Link our event source to the inode data object */
1869 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1870 s->inotify.inode_data = inode_data;
1871
1872 /* Actually realize the watch now */
1873 r = inode_data_realize_watch(e, inode_data);
1874 if (r < 0)
1875 return r;
1876
1877 (void) sd_event_source_set_description(s, path);
1878
1879 if (ret)
1880 *ret = s;
1881 TAKE_PTR(s);
1882
1883 return 0;
1884 }
1885
1886 static sd_event_source* event_source_free(sd_event_source *s) {
1887 if (!s)
1888 return NULL;
1889
1890 /* Here's a special hack: when we are called from a
1891 * dispatch handler we won't free the event source
1892 * immediately, but we will detach the fd from the
1893 * epoll. This way it is safe for the caller to unref
1894 * the event source and immediately close the fd, but
1895 * we still retain a valid event source object after
1896 * the callback. */
1897
1898 if (s->dispatching) {
1899 if (s->type == SOURCE_IO)
1900 source_io_unregister(s);
1901
1902 source_disconnect(s);
1903 } else
1904 source_free(s);
1905
1906 return NULL;
1907 }
1908
1909 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1910
1911 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1912 assert_return(s, -EINVAL);
1913 assert_return(!event_pid_changed(s->event), -ECHILD);
1914
1915 return free_and_strdup(&s->description, description);
1916 }
1917
1918 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1919 assert_return(s, -EINVAL);
1920 assert_return(description, -EINVAL);
1921 assert_return(!event_pid_changed(s->event), -ECHILD);
1922
1923 if (!s->description)
1924 return -ENXIO;
1925
1926 *description = s->description;
1927 return 0;
1928 }
1929
1930 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1931 assert_return(s, NULL);
1932
1933 return s->event;
1934 }
1935
1936 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1937 assert_return(s, -EINVAL);
1938 assert_return(s->type != SOURCE_EXIT, -EDOM);
1939 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1940 assert_return(!event_pid_changed(s->event), -ECHILD);
1941
1942 return s->pending;
1943 }
1944
1945 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1946 assert_return(s, -EINVAL);
1947 assert_return(s->type == SOURCE_IO, -EDOM);
1948 assert_return(!event_pid_changed(s->event), -ECHILD);
1949
1950 return s->io.fd;
1951 }
1952
1953 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1954 int r;
1955
1956 assert_return(s, -EINVAL);
1957 assert_return(fd >= 0, -EBADF);
1958 assert_return(s->type == SOURCE_IO, -EDOM);
1959 assert_return(!event_pid_changed(s->event), -ECHILD);
1960
1961 if (s->io.fd == fd)
1962 return 0;
1963
1964 if (s->enabled == SD_EVENT_OFF) {
1965 s->io.fd = fd;
1966 s->io.registered = false;
1967 } else {
1968 int saved_fd;
1969
1970 saved_fd = s->io.fd;
1971 assert(s->io.registered);
1972
1973 s->io.fd = fd;
1974 s->io.registered = false;
1975
1976 r = source_io_register(s, s->enabled, s->io.events);
1977 if (r < 0) {
1978 s->io.fd = saved_fd;
1979 s->io.registered = true;
1980 return r;
1981 }
1982
1983 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1984 }
1985
1986 return 0;
1987 }
1988
1989 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1990 assert_return(s, -EINVAL);
1991 assert_return(s->type == SOURCE_IO, -EDOM);
1992
1993 return s->io.owned;
1994 }
1995
1996 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1997 assert_return(s, -EINVAL);
1998 assert_return(s->type == SOURCE_IO, -EDOM);
1999
2000 s->io.owned = own;
2001 return 0;
2002 }
2003
2004 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2005 assert_return(s, -EINVAL);
2006 assert_return(events, -EINVAL);
2007 assert_return(s->type == SOURCE_IO, -EDOM);
2008 assert_return(!event_pid_changed(s->event), -ECHILD);
2009
2010 *events = s->io.events;
2011 return 0;
2012 }
2013
2014 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2015 int r;
2016
2017 assert_return(s, -EINVAL);
2018 assert_return(s->type == SOURCE_IO, -EDOM);
2019 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2020 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2021 assert_return(!event_pid_changed(s->event), -ECHILD);
2022
2023 /* edge-triggered updates are never skipped, so we can reset edges */
2024 if (s->io.events == events && !(events & EPOLLET))
2025 return 0;
2026
2027 r = source_set_pending(s, false);
2028 if (r < 0)
2029 return r;
2030
2031 if (s->enabled != SD_EVENT_OFF) {
2032 r = source_io_register(s, s->enabled, events);
2033 if (r < 0)
2034 return r;
2035 }
2036
2037 s->io.events = events;
2038
2039 return 0;
2040 }
2041
2042 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2043 assert_return(s, -EINVAL);
2044 assert_return(revents, -EINVAL);
2045 assert_return(s->type == SOURCE_IO, -EDOM);
2046 assert_return(s->pending, -ENODATA);
2047 assert_return(!event_pid_changed(s->event), -ECHILD);
2048
2049 *revents = s->io.revents;
2050 return 0;
2051 }
2052
2053 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2054 assert_return(s, -EINVAL);
2055 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2056 assert_return(!event_pid_changed(s->event), -ECHILD);
2057
2058 return s->signal.sig;
2059 }
2060
2061 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2062 assert_return(s, -EINVAL);
2063 assert_return(!event_pid_changed(s->event), -ECHILD);
2064
2065 *priority = s->priority;
2066 return 0;
2067 }
2068
2069 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2070 bool rm_inotify = false, rm_inode = false;
2071 struct inotify_data *new_inotify_data = NULL;
2072 struct inode_data *new_inode_data = NULL;
2073 int r;
2074
2075 assert_return(s, -EINVAL);
2076 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2077 assert_return(!event_pid_changed(s->event), -ECHILD);
2078
2079 if (s->priority == priority)
2080 return 0;
2081
2082 if (s->type == SOURCE_INOTIFY) {
2083 struct inode_data *old_inode_data;
2084
2085 assert(s->inotify.inode_data);
2086 old_inode_data = s->inotify.inode_data;
2087
2088 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2089 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2090 * events we allow priority changes only until the first following iteration. */
2091 if (old_inode_data->fd < 0)
2092 return -EOPNOTSUPP;
2093
2094 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2095 if (r < 0)
2096 return r;
2097 rm_inotify = r > 0;
2098
2099 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2100 if (r < 0)
2101 goto fail;
2102 rm_inode = r > 0;
2103
2104 if (new_inode_data->fd < 0) {
2105 /* Duplicate the fd for the new inode object if we don't have any yet */
2106 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2107 if (new_inode_data->fd < 0) {
2108 r = -errno;
2109 goto fail;
2110 }
2111
2112 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2113 }
2114
2115 /* Move the event source to the new inode data structure */
2116 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2117 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2118 s->inotify.inode_data = new_inode_data;
2119
2120 /* Now create the new watch */
2121 r = inode_data_realize_watch(s->event, new_inode_data);
2122 if (r < 0) {
2123 /* Move it back */
2124 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2125 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2126 s->inotify.inode_data = old_inode_data;
2127 goto fail;
2128 }
2129
2130 s->priority = priority;
2131
2132 event_gc_inode_data(s->event, old_inode_data);
2133
2134 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2135 struct signal_data *old, *d;
2136
2137 /* Move us from the signalfd belonging to the old
2138 * priority to the signalfd of the new priority */
2139
2140 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2141
2142 s->priority = priority;
2143
2144 r = event_make_signal_data(s->event, s->signal.sig, &d);
2145 if (r < 0) {
2146 s->priority = old->priority;
2147 return r;
2148 }
2149
2150 event_unmask_signal_data(s->event, old, s->signal.sig);
2151 } else
2152 s->priority = priority;
2153
2154 if (s->pending)
2155 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2156
2157 if (s->prepare)
2158 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2159
2160 if (s->type == SOURCE_EXIT)
2161 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2162
2163 return 0;
2164
2165 fail:
2166 if (rm_inode)
2167 event_free_inode_data(s->event, new_inode_data);
2168
2169 if (rm_inotify)
2170 event_free_inotify_data(s->event, new_inotify_data);
2171
2172 return r;
2173 }
2174
2175 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2176 assert_return(s, -EINVAL);
2177 assert_return(!event_pid_changed(s->event), -ECHILD);
2178
2179 if (m)
2180 *m = s->enabled;
2181 return s->enabled != SD_EVENT_OFF;
2182 }
2183
2184 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2185 int r;
2186
2187 assert_return(s, -EINVAL);
2188 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2189 assert_return(!event_pid_changed(s->event), -ECHILD);
2190
2191 /* If we are dead anyway, we are fine with turning off
2192 * sources, but everything else needs to fail. */
2193 if (s->event->state == SD_EVENT_FINISHED)
2194 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2195
2196 if (s->enabled == m)
2197 return 0;
2198
2199 if (m == SD_EVENT_OFF) {
2200
2201 /* Unset the pending flag when this event source is disabled */
2202 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2203 r = source_set_pending(s, false);
2204 if (r < 0)
2205 return r;
2206 }
2207
2208 switch (s->type) {
2209
2210 case SOURCE_IO:
2211 source_io_unregister(s);
2212 s->enabled = m;
2213 break;
2214
2215 case SOURCE_TIME_REALTIME:
2216 case SOURCE_TIME_BOOTTIME:
2217 case SOURCE_TIME_MONOTONIC:
2218 case SOURCE_TIME_REALTIME_ALARM:
2219 case SOURCE_TIME_BOOTTIME_ALARM: {
2220 struct clock_data *d;
2221
2222 s->enabled = m;
2223 d = event_get_clock_data(s->event, s->type);
2224 assert(d);
2225
2226 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2227 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2228 d->needs_rearm = true;
2229 break;
2230 }
2231
2232 case SOURCE_SIGNAL:
2233 s->enabled = m;
2234
2235 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2236 break;
2237
2238 case SOURCE_CHILD:
2239 s->enabled = m;
2240
2241 assert(s->event->n_enabled_child_sources > 0);
2242 s->event->n_enabled_child_sources--;
2243
2244 if (EVENT_SOURCE_WATCH_PIDFD(s))
2245 source_child_pidfd_unregister(s);
2246 else
2247 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2248
2249 break;
2250
2251 case SOURCE_EXIT:
2252 s->enabled = m;
2253 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2254 break;
2255
2256 case SOURCE_DEFER:
2257 case SOURCE_POST:
2258 case SOURCE_INOTIFY:
2259 s->enabled = m;
2260 break;
2261
2262 default:
2263 assert_not_reached("Wut? I shouldn't exist.");
2264 }
2265
2266 } else {
2267
2268 /* Unset the pending flag when this event source is enabled */
2269 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2270 r = source_set_pending(s, false);
2271 if (r < 0)
2272 return r;
2273 }
2274
2275 switch (s->type) {
2276
2277 case SOURCE_IO:
2278 r = source_io_register(s, m, s->io.events);
2279 if (r < 0)
2280 return r;
2281
2282 s->enabled = m;
2283 break;
2284
2285 case SOURCE_TIME_REALTIME:
2286 case SOURCE_TIME_BOOTTIME:
2287 case SOURCE_TIME_MONOTONIC:
2288 case SOURCE_TIME_REALTIME_ALARM:
2289 case SOURCE_TIME_BOOTTIME_ALARM: {
2290 struct clock_data *d;
2291
2292 s->enabled = m;
2293 d = event_get_clock_data(s->event, s->type);
2294 assert(d);
2295
2296 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2297 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2298 d->needs_rearm = true;
2299 break;
2300 }
2301
2302 case SOURCE_SIGNAL:
2303
2304 s->enabled = m;
2305
2306 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2307 if (r < 0) {
2308 s->enabled = SD_EVENT_OFF;
2309 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2310 return r;
2311 }
2312
2313 break;
2314
2315 case SOURCE_CHILD:
2316
2317 if (s->enabled == SD_EVENT_OFF)
2318 s->event->n_enabled_child_sources++;
2319
2320 s->enabled = m;
2321
2322 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2323 /* yes, we have pidfd */
2324
2325 r = source_child_pidfd_register(s, s->enabled);
2326 if (r < 0) {
2327 s->enabled = SD_EVENT_OFF;
2328 s->event->n_enabled_child_sources--;
2329 return r;
2330 }
2331 } else {
2332 /* no pidfd, or something other to watch for than WEXITED */
2333
2334 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2335 if (r < 0) {
2336 s->enabled = SD_EVENT_OFF;
2337 s->event->n_enabled_child_sources--;
2338 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2339 return r;
2340 }
2341 }
2342
2343 break;
2344
2345 case SOURCE_EXIT:
2346 s->enabled = m;
2347 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2348 break;
2349
2350 case SOURCE_DEFER:
2351 case SOURCE_POST:
2352 case SOURCE_INOTIFY:
2353 s->enabled = m;
2354 break;
2355
2356 default:
2357 assert_not_reached("Wut? I shouldn't exist.");
2358 }
2359 }
2360
2361 if (s->pending)
2362 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2363
2364 if (s->prepare)
2365 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2366
2367 return 0;
2368 }
2369
2370 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2371 assert_return(s, -EINVAL);
2372 assert_return(usec, -EINVAL);
2373 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2374 assert_return(!event_pid_changed(s->event), -ECHILD);
2375
2376 *usec = s->time.next;
2377 return 0;
2378 }
2379
2380 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2381 struct clock_data *d;
2382 int r;
2383
2384 assert_return(s, -EINVAL);
2385 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2386 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2387 assert_return(!event_pid_changed(s->event), -ECHILD);
2388
2389 r = source_set_pending(s, false);
2390 if (r < 0)
2391 return r;
2392
2393 s->time.next = usec;
2394
2395 d = event_get_clock_data(s->event, s->type);
2396 assert(d);
2397
2398 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2399 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2400 d->needs_rearm = true;
2401
2402 return 0;
2403 }
2404
2405 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2406 assert_return(s, -EINVAL);
2407 assert_return(usec, -EINVAL);
2408 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2409 assert_return(!event_pid_changed(s->event), -ECHILD);
2410
2411 *usec = s->time.accuracy;
2412 return 0;
2413 }
2414
2415 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2416 struct clock_data *d;
2417 int r;
2418
2419 assert_return(s, -EINVAL);
2420 assert_return(usec != (uint64_t) -1, -EINVAL);
2421 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2422 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2423 assert_return(!event_pid_changed(s->event), -ECHILD);
2424
2425 r = source_set_pending(s, false);
2426 if (r < 0)
2427 return r;
2428
2429 if (usec == 0)
2430 usec = DEFAULT_ACCURACY_USEC;
2431
2432 s->time.accuracy = usec;
2433
2434 d = event_get_clock_data(s->event, s->type);
2435 assert(d);
2436
2437 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2438 d->needs_rearm = true;
2439
2440 return 0;
2441 }
2442
2443 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2444 assert_return(s, -EINVAL);
2445 assert_return(clock, -EINVAL);
2446 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2447 assert_return(!event_pid_changed(s->event), -ECHILD);
2448
2449 *clock = event_source_type_to_clock(s->type);
2450 return 0;
2451 }
2452
2453 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2454 assert_return(s, -EINVAL);
2455 assert_return(pid, -EINVAL);
2456 assert_return(s->type == SOURCE_CHILD, -EDOM);
2457 assert_return(!event_pid_changed(s->event), -ECHILD);
2458
2459 *pid = s->child.pid;
2460 return 0;
2461 }
2462
2463 _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2464 assert_return(s, -EINVAL);
2465 assert_return(s->type == SOURCE_CHILD, -EDOM);
2466 assert_return(!event_pid_changed(s->event), -ECHILD);
2467
2468 if (s->child.pidfd < 0)
2469 return -EOPNOTSUPP;
2470
2471 return s->child.pidfd;
2472 }
2473
2474 _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2475 assert_return(s, -EINVAL);
2476 assert_return(s->type == SOURCE_CHILD, -EDOM);
2477 assert_return(!event_pid_changed(s->event), -ECHILD);
2478 assert_return(SIGNAL_VALID(sig), -EINVAL);
2479
2480 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2481 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2482 * available. */
2483 if (s->child.exited)
2484 return -ESRCH;
2485
2486 if (s->child.pidfd >= 0) {
2487 siginfo_t copy;
2488
2489 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2490 * structure here */
2491 if (si)
2492 copy = *si;
2493
2494 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2495 /* Let's propagate the error only if the system call is not implemented or prohibited */
2496 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2497 return -errno;
2498 } else
2499 return 0;
2500 }
2501
2502 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2503 * this here. */
2504 if (flags != 0)
2505 return -EOPNOTSUPP;
2506
2507 if (si) {
2508 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2509 siginfo_t copy = *si;
2510
2511 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2512 return -errno;
2513 } else if (kill(s->child.pid, sig) < 0)
2514 return -errno;
2515
2516 return 0;
2517 }
2518
2519 _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2520 assert_return(s, -EINVAL);
2521 assert_return(s->type == SOURCE_CHILD, -EDOM);
2522
2523 if (s->child.pidfd < 0)
2524 return -EOPNOTSUPP;
2525
2526 return s->child.pidfd_owned;
2527 }
2528
2529 _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2530 assert_return(s, -EINVAL);
2531 assert_return(s->type == SOURCE_CHILD, -EDOM);
2532
2533 if (s->child.pidfd < 0)
2534 return -EOPNOTSUPP;
2535
2536 s->child.pidfd_owned = own;
2537 return 0;
2538 }
2539
2540 _public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2541 assert_return(s, -EINVAL);
2542 assert_return(s->type == SOURCE_CHILD, -EDOM);
2543
2544 return s->child.process_owned;
2545 }
2546
2547 _public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2548 assert_return(s, -EINVAL);
2549 assert_return(s->type == SOURCE_CHILD, -EDOM);
2550
2551 s->child.process_owned = own;
2552 return 0;
2553 }
2554
2555 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2556 assert_return(s, -EINVAL);
2557 assert_return(mask, -EINVAL);
2558 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2559 assert_return(!event_pid_changed(s->event), -ECHILD);
2560
2561 *mask = s->inotify.mask;
2562 return 0;
2563 }
2564
2565 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2566 int r;
2567
2568 assert_return(s, -EINVAL);
2569 assert_return(s->type != SOURCE_EXIT, -EDOM);
2570 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2571 assert_return(!event_pid_changed(s->event), -ECHILD);
2572
2573 if (s->prepare == callback)
2574 return 0;
2575
2576 if (callback && s->prepare) {
2577 s->prepare = callback;
2578 return 0;
2579 }
2580
2581 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2582 if (r < 0)
2583 return r;
2584
2585 s->prepare = callback;
2586
2587 if (callback) {
2588 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2589 if (r < 0)
2590 return r;
2591 } else
2592 prioq_remove(s->event->prepare, s, &s->prepare_index);
2593
2594 return 0;
2595 }
2596
2597 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2598 assert_return(s, NULL);
2599
2600 return s->userdata;
2601 }
2602
2603 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2604 void *ret;
2605
2606 assert_return(s, NULL);
2607
2608 ret = s->userdata;
2609 s->userdata = userdata;
2610
2611 return ret;
2612 }
2613
2614 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2615 usec_t c;
2616 assert(e);
2617 assert(a <= b);
2618
2619 if (a <= 0)
2620 return 0;
2621 if (a >= USEC_INFINITY)
2622 return USEC_INFINITY;
2623
2624 if (b <= a + 1)
2625 return a;
2626
2627 initialize_perturb(e);
2628
2629 /*
2630 Find a good time to wake up again between times a and b. We
2631 have two goals here:
2632
2633 a) We want to wake up as seldom as possible, hence prefer
2634 later times over earlier times.
2635
2636 b) But if we have to wake up, then let's make sure to
2637 dispatch as much as possible on the entire system.
2638
2639 We implement this by waking up everywhere at the same time
2640 within any given minute if we can, synchronised via the
2641 perturbation value determined from the boot ID. If we can't,
2642 then we try to find the same spot in every 10s, then 1s and
2643 then 250ms step. Otherwise, we pick the last possible time
2644 to wake up.
2645 */
2646
2647 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2648 if (c >= b) {
2649 if (_unlikely_(c < USEC_PER_MINUTE))
2650 return b;
2651
2652 c -= USEC_PER_MINUTE;
2653 }
2654
2655 if (c >= a)
2656 return c;
2657
2658 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2659 if (c >= b) {
2660 if (_unlikely_(c < USEC_PER_SEC*10))
2661 return b;
2662
2663 c -= USEC_PER_SEC*10;
2664 }
2665
2666 if (c >= a)
2667 return c;
2668
2669 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2670 if (c >= b) {
2671 if (_unlikely_(c < USEC_PER_SEC))
2672 return b;
2673
2674 c -= USEC_PER_SEC;
2675 }
2676
2677 if (c >= a)
2678 return c;
2679
2680 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2681 if (c >= b) {
2682 if (_unlikely_(c < USEC_PER_MSEC*250))
2683 return b;
2684
2685 c -= USEC_PER_MSEC*250;
2686 }
2687
2688 if (c >= a)
2689 return c;
2690
2691 return b;
2692 }
2693
2694 static int event_arm_timer(
2695 sd_event *e,
2696 struct clock_data *d) {
2697
2698 struct itimerspec its = {};
2699 sd_event_source *a, *b;
2700 usec_t t;
2701 int r;
2702
2703 assert(e);
2704 assert(d);
2705
2706 if (!d->needs_rearm)
2707 return 0;
2708 else
2709 d->needs_rearm = false;
2710
2711 a = prioq_peek(d->earliest);
2712 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2713
2714 if (d->fd < 0)
2715 return 0;
2716
2717 if (d->next == USEC_INFINITY)
2718 return 0;
2719
2720 /* disarm */
2721 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2722 if (r < 0)
2723 return r;
2724
2725 d->next = USEC_INFINITY;
2726 return 0;
2727 }
2728
2729 b = prioq_peek(d->latest);
2730 assert_se(b && b->enabled != SD_EVENT_OFF);
2731
2732 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2733 if (d->next == t)
2734 return 0;
2735
2736 assert_se(d->fd >= 0);
2737
2738 if (t == 0) {
2739 /* We don' want to disarm here, just mean some time looooong ago. */
2740 its.it_value.tv_sec = 0;
2741 its.it_value.tv_nsec = 1;
2742 } else
2743 timespec_store(&its.it_value, t);
2744
2745 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2746 if (r < 0)
2747 return -errno;
2748
2749 d->next = t;
2750 return 0;
2751 }
2752
2753 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2754 assert(e);
2755 assert(s);
2756 assert(s->type == SOURCE_IO);
2757
2758 /* If the event source was already pending, we just OR in the
2759 * new revents, otherwise we reset the value. The ORing is
2760 * necessary to handle EPOLLONESHOT events properly where
2761 * readability might happen independently of writability, and
2762 * we need to keep track of both */
2763
2764 if (s->pending)
2765 s->io.revents |= revents;
2766 else
2767 s->io.revents = revents;
2768
2769 return source_set_pending(s, true);
2770 }
2771
2772 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2773 uint64_t x;
2774 ssize_t ss;
2775
2776 assert(e);
2777 assert(fd >= 0);
2778
2779 assert_return(events == EPOLLIN, -EIO);
2780
2781 ss = read(fd, &x, sizeof(x));
2782 if (ss < 0) {
2783 if (IN_SET(errno, EAGAIN, EINTR))
2784 return 0;
2785
2786 return -errno;
2787 }
2788
2789 if (_unlikely_(ss != sizeof(x)))
2790 return -EIO;
2791
2792 if (next)
2793 *next = USEC_INFINITY;
2794
2795 return 0;
2796 }
2797
2798 static int process_timer(
2799 sd_event *e,
2800 usec_t n,
2801 struct clock_data *d) {
2802
2803 sd_event_source *s;
2804 int r;
2805
2806 assert(e);
2807 assert(d);
2808
2809 for (;;) {
2810 s = prioq_peek(d->earliest);
2811 if (!s ||
2812 s->time.next > n ||
2813 s->enabled == SD_EVENT_OFF ||
2814 s->pending)
2815 break;
2816
2817 r = source_set_pending(s, true);
2818 if (r < 0)
2819 return r;
2820
2821 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2822 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2823 d->needs_rearm = true;
2824 }
2825
2826 return 0;
2827 }
2828
2829 static int process_child(sd_event *e) {
2830 sd_event_source *s;
2831 Iterator i;
2832 int r;
2833
2834 assert(e);
2835
2836 e->need_process_child = false;
2837
2838 /*
2839 So, this is ugly. We iteratively invoke waitid() with P_PID
2840 + WNOHANG for each PID we wait for, instead of using
2841 P_ALL. This is because we only want to get child
2842 information of very specific child processes, and not all
2843 of them. We might not have processed the SIGCHLD even of a
2844 previous invocation and we don't want to maintain a
2845 unbounded *per-child* event queue, hence we really don't
2846 want anything flushed out of the kernel's queue that we
2847 don't care about. Since this is O(n) this means that if you
2848 have a lot of processes you probably want to handle SIGCHLD
2849 yourself.
2850
2851 We do not reap the children here (by using WNOWAIT), this
2852 is only done after the event source is dispatched so that
2853 the callback still sees the process as a zombie.
2854 */
2855
2856 HASHMAP_FOREACH(s, e->child_sources, i) {
2857 assert(s->type == SOURCE_CHILD);
2858
2859 if (s->pending)
2860 continue;
2861
2862 if (s->enabled == SD_EVENT_OFF)
2863 continue;
2864
2865 if (s->child.exited)
2866 continue;
2867
2868 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2869 continue;
2870
2871 zero(s->child.siginfo);
2872 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2873 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2874 if (r < 0)
2875 return -errno;
2876
2877 if (s->child.siginfo.si_pid != 0) {
2878 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2879
2880 if (zombie)
2881 s->child.exited = true;
2882
2883 if (!zombie && (s->child.options & WEXITED)) {
2884 /* If the child isn't dead then let's
2885 * immediately remove the state change
2886 * from the queue, since there's no
2887 * benefit in leaving it queued */
2888
2889 assert(s->child.options & (WSTOPPED|WCONTINUED));
2890 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2891 }
2892
2893 r = source_set_pending(s, true);
2894 if (r < 0)
2895 return r;
2896 }
2897 }
2898
2899 return 0;
2900 }
2901
2902 static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2903 assert(e);
2904 assert(s);
2905 assert(s->type == SOURCE_CHILD);
2906
2907 if (s->pending)
2908 return 0;
2909
2910 if (s->enabled == SD_EVENT_OFF)
2911 return 0;
2912
2913 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2914 return 0;
2915
2916 zero(s->child.siginfo);
2917 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2918 return -errno;
2919
2920 if (s->child.siginfo.si_pid == 0)
2921 return 0;
2922
2923 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2924 s->child.exited = true;
2925
2926 return source_set_pending(s, true);
2927 }
2928
2929 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2930 bool read_one = false;
2931 int r;
2932
2933 assert(e);
2934 assert(d);
2935 assert_return(events == EPOLLIN, -EIO);
2936
2937 /* If there's a signal queued on this priority and SIGCHLD is
2938 on this priority too, then make sure to recheck the
2939 children we watch. This is because we only ever dequeue
2940 the first signal per priority, and if we dequeue one, and
2941 SIGCHLD might be enqueued later we wouldn't know, but we
2942 might have higher priority children we care about hence we
2943 need to check that explicitly. */
2944
2945 if (sigismember(&d->sigset, SIGCHLD))
2946 e->need_process_child = true;
2947
2948 /* If there's already an event source pending for this
2949 * priority we don't read another */
2950 if (d->current)
2951 return 0;
2952
2953 for (;;) {
2954 struct signalfd_siginfo si;
2955 ssize_t n;
2956 sd_event_source *s = NULL;
2957
2958 n = read(d->fd, &si, sizeof(si));
2959 if (n < 0) {
2960 if (IN_SET(errno, EAGAIN, EINTR))
2961 return read_one;
2962
2963 return -errno;
2964 }
2965
2966 if (_unlikely_(n != sizeof(si)))
2967 return -EIO;
2968
2969 assert(SIGNAL_VALID(si.ssi_signo));
2970
2971 read_one = true;
2972
2973 if (e->signal_sources)
2974 s = e->signal_sources[si.ssi_signo];
2975 if (!s)
2976 continue;
2977 if (s->pending)
2978 continue;
2979
2980 s->signal.siginfo = si;
2981 d->current = s;
2982
2983 r = source_set_pending(s, true);
2984 if (r < 0)
2985 return r;
2986
2987 return 1;
2988 }
2989 }
2990
2991 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2992 ssize_t n;
2993
2994 assert(e);
2995 assert(d);
2996
2997 assert_return(revents == EPOLLIN, -EIO);
2998
2999 /* If there's already an event source pending for this priority, don't read another */
3000 if (d->n_pending > 0)
3001 return 0;
3002
3003 /* Is the read buffer non-empty? If so, let's not read more */
3004 if (d->buffer_filled > 0)
3005 return 0;
3006
3007 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3008 if (n < 0) {
3009 if (IN_SET(errno, EAGAIN, EINTR))
3010 return 0;
3011
3012 return -errno;
3013 }
3014
3015 assert(n > 0);
3016 d->buffer_filled = (size_t) n;
3017 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3018
3019 return 1;
3020 }
3021
3022 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3023 assert(e);
3024 assert(d);
3025 assert(sz <= d->buffer_filled);
3026
3027 if (sz == 0)
3028 return;
3029
3030 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3031 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3032 d->buffer_filled -= sz;
3033
3034 if (d->buffer_filled == 0)
3035 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3036 }
3037
3038 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3039 int r;
3040
3041 assert(e);
3042 assert(d);
3043
3044 /* If there's already an event source pending for this priority, don't read another */
3045 if (d->n_pending > 0)
3046 return 0;
3047
3048 while (d->buffer_filled > 0) {
3049 size_t sz;
3050
3051 /* Let's validate that the event structures are complete */
3052 if (d->buffer_filled < offsetof(struct inotify_event, name))
3053 return -EIO;
3054
3055 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3056 if (d->buffer_filled < sz)
3057 return -EIO;
3058
3059 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3060 struct inode_data *inode_data;
3061 Iterator i;
3062
3063 /* The queue overran, let's pass this event to all event sources connected to this inotify
3064 * object */
3065
3066 HASHMAP_FOREACH(inode_data, d->inodes, i) {
3067 sd_event_source *s;
3068
3069 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3070
3071 if (s->enabled == SD_EVENT_OFF)
3072 continue;
3073
3074 r = source_set_pending(s, true);
3075 if (r < 0)
3076 return r;
3077 }
3078 }
3079 } else {
3080 struct inode_data *inode_data;
3081 sd_event_source *s;
3082
3083 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3084 * our watch descriptor table. */
3085 if (d->buffer.ev.mask & IN_IGNORED) {
3086
3087 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3088 if (!inode_data) {
3089 event_inotify_data_drop(e, d, sz);
3090 continue;
3091 }
3092
3093 /* The watch descriptor was removed by the kernel, let's drop it here too */
3094 inode_data->wd = -1;
3095 } else {
3096 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3097 if (!inode_data) {
3098 event_inotify_data_drop(e, d, sz);
3099 continue;
3100 }
3101 }
3102
3103 /* Trigger all event sources that are interested in these events. Also trigger all event
3104 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3105 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3106
3107 if (s->enabled == SD_EVENT_OFF)
3108 continue;
3109
3110 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3111 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3112 continue;
3113
3114 r = source_set_pending(s, true);
3115 if (r < 0)
3116 return r;
3117 }
3118 }
3119
3120 /* Something pending now? If so, let's finish, otherwise let's read more. */
3121 if (d->n_pending > 0)
3122 return 1;
3123 }
3124
3125 return 0;
3126 }
3127
3128 static int process_inotify(sd_event *e) {
3129 struct inotify_data *d;
3130 int r, done = 0;
3131
3132 assert(e);
3133
3134 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3135 r = event_inotify_data_process(e, d);
3136 if (r < 0)
3137 return r;
3138 if (r > 0)
3139 done ++;
3140 }
3141
3142 return done;
3143 }
3144
3145 static int source_dispatch(sd_event_source *s) {
3146 EventSourceType saved_type;
3147 int r = 0;
3148
3149 assert(s);
3150 assert(s->pending || s->type == SOURCE_EXIT);
3151
3152 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3153 * the event. */
3154 saved_type = s->type;
3155
3156 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3157 r = source_set_pending(s, false);
3158 if (r < 0)
3159 return r;
3160 }
3161
3162 if (s->type != SOURCE_POST) {
3163 sd_event_source *z;
3164 Iterator i;
3165
3166 /* If we execute a non-post source, let's mark all
3167 * post sources as pending */
3168
3169 SET_FOREACH(z, s->event->post_sources, i) {
3170 if (z->enabled == SD_EVENT_OFF)
3171 continue;
3172
3173 r = source_set_pending(z, true);
3174 if (r < 0)
3175 return r;
3176 }
3177 }
3178
3179 if (s->enabled == SD_EVENT_ONESHOT) {
3180 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3181 if (r < 0)
3182 return r;
3183 }
3184
3185 s->dispatching = true;
3186
3187 switch (s->type) {
3188
3189 case SOURCE_IO:
3190 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3191 break;
3192
3193 case SOURCE_TIME_REALTIME:
3194 case SOURCE_TIME_BOOTTIME:
3195 case SOURCE_TIME_MONOTONIC:
3196 case SOURCE_TIME_REALTIME_ALARM:
3197 case SOURCE_TIME_BOOTTIME_ALARM:
3198 r = s->time.callback(s, s->time.next, s->userdata);
3199 break;
3200
3201 case SOURCE_SIGNAL:
3202 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3203 break;
3204
3205 case SOURCE_CHILD: {
3206 bool zombie;
3207
3208 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3209
3210 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3211
3212 /* Now, reap the PID for good. */
3213 if (zombie) {
3214 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3215 s->child.waited = true;
3216 }
3217
3218 break;
3219 }
3220
3221 case SOURCE_DEFER:
3222 r = s->defer.callback(s, s->userdata);
3223 break;
3224
3225 case SOURCE_POST:
3226 r = s->post.callback(s, s->userdata);
3227 break;
3228
3229 case SOURCE_EXIT:
3230 r = s->exit.callback(s, s->userdata);
3231 break;
3232
3233 case SOURCE_INOTIFY: {
3234 struct sd_event *e = s->event;
3235 struct inotify_data *d;
3236 size_t sz;
3237
3238 assert(s->inotify.inode_data);
3239 assert_se(d = s->inotify.inode_data->inotify_data);
3240
3241 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3242 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3243 assert(d->buffer_filled >= sz);
3244
3245 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3246
3247 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3248 * buffer. */
3249 if (d->n_pending == 0)
3250 event_inotify_data_drop(e, d, sz);
3251
3252 break;
3253 }
3254
3255 case SOURCE_WATCHDOG:
3256 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3257 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3258 assert_not_reached("Wut? I shouldn't exist.");
3259 }
3260
3261 s->dispatching = false;
3262
3263 if (r < 0)
3264 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3265 strna(s->description), event_source_type_to_string(saved_type));
3266
3267 if (s->n_ref == 0)
3268 source_free(s);
3269 else if (r < 0)
3270 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3271
3272 return 1;
3273 }
3274
3275 static int event_prepare(sd_event *e) {
3276 int r;
3277
3278 assert(e);
3279
3280 for (;;) {
3281 sd_event_source *s;
3282
3283 s = prioq_peek(e->prepare);
3284 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3285 break;
3286
3287 s->prepare_iteration = e->iteration;
3288 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3289 if (r < 0)
3290 return r;
3291
3292 assert(s->prepare);
3293
3294 s->dispatching = true;
3295 r = s->prepare(s, s->userdata);
3296 s->dispatching = false;
3297
3298 if (r < 0)
3299 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3300 strna(s->description), event_source_type_to_string(s->type));
3301
3302 if (s->n_ref == 0)
3303 source_free(s);
3304 else if (r < 0)
3305 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3306 }
3307
3308 return 0;
3309 }
3310
3311 static int dispatch_exit(sd_event *e) {
3312 sd_event_source *p;
3313 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3314 int r;
3315
3316 assert(e);
3317
3318 p = prioq_peek(e->exit);
3319 if (!p || p->enabled == SD_EVENT_OFF) {
3320 e->state = SD_EVENT_FINISHED;
3321 return 0;
3322 }
3323
3324 ref = sd_event_ref(e);
3325 e->iteration++;
3326 e->state = SD_EVENT_EXITING;
3327 r = source_dispatch(p);
3328 e->state = SD_EVENT_INITIAL;
3329 return r;
3330 }
3331
3332 static sd_event_source* event_next_pending(sd_event *e) {
3333 sd_event_source *p;
3334
3335 assert(e);
3336
3337 p = prioq_peek(e->pending);
3338 if (!p)
3339 return NULL;
3340
3341 if (p->enabled == SD_EVENT_OFF)
3342 return NULL;
3343
3344 return p;
3345 }
3346
3347 static int arm_watchdog(sd_event *e) {
3348 struct itimerspec its = {};
3349 usec_t t;
3350 int r;
3351
3352 assert(e);
3353 assert(e->watchdog_fd >= 0);
3354
3355 t = sleep_between(e,
3356 e->watchdog_last + (e->watchdog_period / 2),
3357 e->watchdog_last + (e->watchdog_period * 3 / 4));
3358
3359 timespec_store(&its.it_value, t);
3360
3361 /* Make sure we never set the watchdog to 0, which tells the
3362 * kernel to disable it. */
3363 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3364 its.it_value.tv_nsec = 1;
3365
3366 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3367 if (r < 0)
3368 return -errno;
3369
3370 return 0;
3371 }
3372
3373 static int process_watchdog(sd_event *e) {
3374 assert(e);
3375
3376 if (!e->watchdog)
3377 return 0;
3378
3379 /* Don't notify watchdog too often */
3380 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3381 return 0;
3382
3383 sd_notify(false, "WATCHDOG=1");
3384 e->watchdog_last = e->timestamp.monotonic;
3385
3386 return arm_watchdog(e);
3387 }
3388
3389 static void event_close_inode_data_fds(sd_event *e) {
3390 struct inode_data *d;
3391
3392 assert(e);
3393
3394 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3395 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3396 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3397 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3398 * compromise. */
3399
3400 while ((d = e->inode_data_to_close)) {
3401 assert(d->fd >= 0);
3402 d->fd = safe_close(d->fd);
3403
3404 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3405 }
3406 }
3407
3408 _public_ int sd_event_prepare(sd_event *e) {
3409 int r;
3410
3411 assert_return(e, -EINVAL);
3412 assert_return(e = event_resolve(e), -ENOPKG);
3413 assert_return(!event_pid_changed(e), -ECHILD);
3414 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3415 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3416
3417 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3418 * this check here once, since gettid() is typically not cached, and thus want to minimize
3419 * syscalls */
3420 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3421
3422 if (e->exit_requested)
3423 goto pending;
3424
3425 e->iteration++;
3426
3427 e->state = SD_EVENT_PREPARING;
3428 r = event_prepare(e);
3429 e->state = SD_EVENT_INITIAL;
3430 if (r < 0)
3431 return r;
3432
3433 r = event_arm_timer(e, &e->realtime);
3434 if (r < 0)
3435 return r;
3436
3437 r = event_arm_timer(e, &e->boottime);
3438 if (r < 0)
3439 return r;
3440
3441 r = event_arm_timer(e, &e->monotonic);
3442 if (r < 0)
3443 return r;
3444
3445 r = event_arm_timer(e, &e->realtime_alarm);
3446 if (r < 0)
3447 return r;
3448
3449 r = event_arm_timer(e, &e->boottime_alarm);
3450 if (r < 0)
3451 return r;
3452
3453 event_close_inode_data_fds(e);
3454
3455 if (event_next_pending(e) || e->need_process_child)
3456 goto pending;
3457
3458 e->state = SD_EVENT_ARMED;
3459
3460 return 0;
3461
3462 pending:
3463 e->state = SD_EVENT_ARMED;
3464 r = sd_event_wait(e, 0);
3465 if (r == 0)
3466 e->state = SD_EVENT_ARMED;
3467
3468 return r;
3469 }
3470
3471 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3472 size_t event_queue_max;
3473 int r, m, i;
3474
3475 assert_return(e, -EINVAL);
3476 assert_return(e = event_resolve(e), -ENOPKG);
3477 assert_return(!event_pid_changed(e), -ECHILD);
3478 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3479 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3480
3481 if (e->exit_requested) {
3482 e->state = SD_EVENT_PENDING;
3483 return 1;
3484 }
3485
3486 event_queue_max = MAX(e->n_sources, 1u);
3487 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3488 return -ENOMEM;
3489
3490 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3491 if (e->inotify_data_buffered)
3492 timeout = 0;
3493
3494 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
3495 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3496 if (m < 0) {
3497 if (errno == EINTR) {
3498 e->state = SD_EVENT_PENDING;
3499 return 1;
3500 }
3501
3502 r = -errno;
3503 goto finish;
3504 }
3505
3506 triple_timestamp_get(&e->timestamp);
3507
3508 for (i = 0; i < m; i++) {
3509
3510 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3511 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
3512 else {
3513 WakeupType *t = e->event_queue[i].data.ptr;
3514
3515 switch (*t) {
3516
3517 case WAKEUP_EVENT_SOURCE: {
3518 sd_event_source *s = e->event_queue[i].data.ptr;
3519
3520 assert(s);
3521
3522 switch (s->type) {
3523
3524 case SOURCE_IO:
3525 r = process_io(e, s, e->event_queue[i].events);
3526 break;
3527
3528 case SOURCE_CHILD:
3529 r = process_pidfd(e, s, e->event_queue[i].events);
3530 break;
3531
3532 default:
3533 assert_not_reached("Unexpected event source type");
3534 }
3535
3536 break;
3537 }
3538
3539 case WAKEUP_CLOCK_DATA: {
3540 struct clock_data *d = e->event_queue[i].data.ptr;
3541
3542 assert(d);
3543
3544 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
3545 break;
3546 }
3547
3548 case WAKEUP_SIGNAL_DATA:
3549 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3550 break;
3551
3552 case WAKEUP_INOTIFY_DATA:
3553 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3554 break;
3555
3556 default:
3557 assert_not_reached("Invalid wake-up pointer");
3558 }
3559 }
3560 if (r < 0)
3561 goto finish;
3562 }
3563
3564 r = process_watchdog(e);
3565 if (r < 0)
3566 goto finish;
3567
3568 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3569 if (r < 0)
3570 goto finish;
3571
3572 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3573 if (r < 0)
3574 goto finish;
3575
3576 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3577 if (r < 0)
3578 goto finish;
3579
3580 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3581 if (r < 0)
3582 goto finish;
3583
3584 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3585 if (r < 0)
3586 goto finish;
3587
3588 if (e->need_process_child) {
3589 r = process_child(e);
3590 if (r < 0)
3591 goto finish;
3592 }
3593
3594 r = process_inotify(e);
3595 if (r < 0)
3596 goto finish;
3597
3598 if (event_next_pending(e)) {
3599 e->state = SD_EVENT_PENDING;
3600
3601 return 1;
3602 }
3603
3604 r = 0;
3605
3606 finish:
3607 e->state = SD_EVENT_INITIAL;
3608
3609 return r;
3610 }
3611
3612 _public_ int sd_event_dispatch(sd_event *e) {
3613 sd_event_source *p;
3614 int r;
3615
3616 assert_return(e, -EINVAL);
3617 assert_return(e = event_resolve(e), -ENOPKG);
3618 assert_return(!event_pid_changed(e), -ECHILD);
3619 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3620 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3621
3622 if (e->exit_requested)
3623 return dispatch_exit(e);
3624
3625 p = event_next_pending(e);
3626 if (p) {
3627 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3628
3629 ref = sd_event_ref(e);
3630 e->state = SD_EVENT_RUNNING;
3631 r = source_dispatch(p);
3632 e->state = SD_EVENT_INITIAL;
3633 return r;
3634 }
3635
3636 e->state = SD_EVENT_INITIAL;
3637
3638 return 1;
3639 }
3640
3641 static void event_log_delays(sd_event *e) {
3642 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3643 size_t l, i;
3644
3645 p = b;
3646 l = sizeof(b);
3647 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3648 l = strpcpyf(&p, l, "%u ", e->delays[i]);
3649 e->delays[i] = 0;
3650 }
3651 log_debug("Event loop iterations: %s", b);
3652 }
3653
3654 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3655 int r;
3656
3657 assert_return(e, -EINVAL);
3658 assert_return(e = event_resolve(e), -ENOPKG);
3659 assert_return(!event_pid_changed(e), -ECHILD);
3660 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3661 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3662
3663 if (e->profile_delays && e->last_run) {
3664 usec_t this_run;
3665 unsigned l;
3666
3667 this_run = now(CLOCK_MONOTONIC);
3668
3669 l = u64log2(this_run - e->last_run);
3670 assert(l < sizeof(e->delays));
3671 e->delays[l]++;
3672
3673 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3674 event_log_delays(e);
3675 e->last_log = this_run;
3676 }
3677 }
3678
3679 r = sd_event_prepare(e);
3680 if (r == 0)
3681 /* There was nothing? Then wait... */
3682 r = sd_event_wait(e, timeout);
3683
3684 if (e->profile_delays)
3685 e->last_run = now(CLOCK_MONOTONIC);
3686
3687 if (r > 0) {
3688 /* There's something now, then let's dispatch it */
3689 r = sd_event_dispatch(e);
3690 if (r < 0)
3691 return r;
3692
3693 return 1;
3694 }
3695
3696 return r;
3697 }
3698
3699 _public_ int sd_event_loop(sd_event *e) {
3700 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3701 int r;
3702
3703 assert_return(e, -EINVAL);
3704 assert_return(e = event_resolve(e), -ENOPKG);
3705 assert_return(!event_pid_changed(e), -ECHILD);
3706 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3707
3708 ref = sd_event_ref(e);
3709
3710 while (e->state != SD_EVENT_FINISHED) {
3711 r = sd_event_run(e, (uint64_t) -1);
3712 if (r < 0)
3713 return r;
3714 }
3715
3716 return e->exit_code;
3717 }
3718
3719 _public_ int sd_event_get_fd(sd_event *e) {
3720
3721 assert_return(e, -EINVAL);
3722 assert_return(e = event_resolve(e), -ENOPKG);
3723 assert_return(!event_pid_changed(e), -ECHILD);
3724
3725 return e->epoll_fd;
3726 }
3727
3728 _public_ int sd_event_get_state(sd_event *e) {
3729 assert_return(e, -EINVAL);
3730 assert_return(e = event_resolve(e), -ENOPKG);
3731 assert_return(!event_pid_changed(e), -ECHILD);
3732
3733 return e->state;
3734 }
3735
3736 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3737 assert_return(e, -EINVAL);
3738 assert_return(e = event_resolve(e), -ENOPKG);
3739 assert_return(code, -EINVAL);
3740 assert_return(!event_pid_changed(e), -ECHILD);
3741
3742 if (!e->exit_requested)
3743 return -ENODATA;
3744
3745 *code = e->exit_code;
3746 return 0;
3747 }
3748
3749 _public_ int sd_event_exit(sd_event *e, int code) {
3750 assert_return(e, -EINVAL);
3751 assert_return(e = event_resolve(e), -ENOPKG);
3752 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3753 assert_return(!event_pid_changed(e), -ECHILD);
3754
3755 e->exit_requested = true;
3756 e->exit_code = code;
3757
3758 return 0;
3759 }
3760
3761 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3762 assert_return(e, -EINVAL);
3763 assert_return(e = event_resolve(e), -ENOPKG);
3764 assert_return(usec, -EINVAL);
3765 assert_return(!event_pid_changed(e), -ECHILD);
3766
3767 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3768 return -EOPNOTSUPP;
3769
3770 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3771 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3772 * the purpose of getting the time this doesn't matter. */
3773 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3774 return -EOPNOTSUPP;
3775
3776 if (!triple_timestamp_is_set(&e->timestamp)) {
3777 /* Implicitly fall back to now() if we never ran
3778 * before and thus have no cached time. */
3779 *usec = now(clock);
3780 return 1;
3781 }
3782
3783 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3784 return 0;
3785 }
3786
3787 _public_ int sd_event_default(sd_event **ret) {
3788 sd_event *e = NULL;
3789 int r;
3790
3791 if (!ret)
3792 return !!default_event;
3793
3794 if (default_event) {
3795 *ret = sd_event_ref(default_event);
3796 return 0;
3797 }
3798
3799 r = sd_event_new(&e);
3800 if (r < 0)
3801 return r;
3802
3803 e->default_event_ptr = &default_event;
3804 e->tid = gettid();
3805 default_event = e;
3806
3807 *ret = e;
3808 return 1;
3809 }
3810
3811 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3812 assert_return(e, -EINVAL);
3813 assert_return(e = event_resolve(e), -ENOPKG);
3814 assert_return(tid, -EINVAL);
3815 assert_return(!event_pid_changed(e), -ECHILD);
3816
3817 if (e->tid != 0) {
3818 *tid = e->tid;
3819 return 0;
3820 }
3821
3822 return -ENXIO;
3823 }
3824
3825 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3826 int r;
3827
3828 assert_return(e, -EINVAL);
3829 assert_return(e = event_resolve(e), -ENOPKG);
3830 assert_return(!event_pid_changed(e), -ECHILD);
3831
3832 if (e->watchdog == !!b)
3833 return e->watchdog;
3834
3835 if (b) {
3836 r = sd_watchdog_enabled(false, &e->watchdog_period);
3837 if (r <= 0)
3838 return r;
3839
3840 /* Issue first ping immediately */
3841 sd_notify(false, "WATCHDOG=1");
3842 e->watchdog_last = now(CLOCK_MONOTONIC);
3843
3844 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3845 if (e->watchdog_fd < 0)
3846 return -errno;
3847
3848 r = arm_watchdog(e);
3849 if (r < 0)
3850 goto fail;
3851
3852 struct epoll_event ev = {
3853 .events = EPOLLIN,
3854 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3855 };
3856
3857 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3858 if (r < 0) {
3859 r = -errno;
3860 goto fail;
3861 }
3862
3863 } else {
3864 if (e->watchdog_fd >= 0) {
3865 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3866 e->watchdog_fd = safe_close(e->watchdog_fd);
3867 }
3868 }
3869
3870 e->watchdog = !!b;
3871 return e->watchdog;
3872
3873 fail:
3874 e->watchdog_fd = safe_close(e->watchdog_fd);
3875 return r;
3876 }
3877
3878 _public_ int sd_event_get_watchdog(sd_event *e) {
3879 assert_return(e, -EINVAL);
3880 assert_return(e = event_resolve(e), -ENOPKG);
3881 assert_return(!event_pid_changed(e), -ECHILD);
3882
3883 return e->watchdog;
3884 }
3885
3886 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3887 assert_return(e, -EINVAL);
3888 assert_return(e = event_resolve(e), -ENOPKG);
3889 assert_return(!event_pid_changed(e), -ECHILD);
3890
3891 *ret = e->iteration;
3892 return 0;
3893 }
3894
3895 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3896 assert_return(s, -EINVAL);
3897
3898 s->destroy_callback = callback;
3899 return 0;
3900 }
3901
3902 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3903 assert_return(s, -EINVAL);
3904
3905 if (ret)
3906 *ret = s->destroy_callback;
3907
3908 return !!s->destroy_callback;
3909 }
3910
3911 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3912 assert_return(s, -EINVAL);
3913
3914 return s->floating;
3915 }
3916
3917 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3918 assert_return(s, -EINVAL);
3919
3920 if (s->floating == !!b)
3921 return 0;
3922
3923 if (!s->event) /* Already disconnected */
3924 return -ESTALE;
3925
3926 s->floating = b;
3927
3928 if (b) {
3929 sd_event_source_ref(s);
3930 sd_event_unref(s->event);
3931 } else {
3932 sd_event_ref(s->event);
3933 sd_event_source_unref(s);
3934 }
3935
3936 return 1;
3937 }