]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Merge pull request #14293 from keur/systemctl_with_dependencies
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "env-util.h"
13 #include "event-source.h"
14 #include "fd-util.h"
15 #include "fs-util.h"
16 #include "hashmap.h"
17 #include "list.h"
18 #include "macro.h"
19 #include "memory-util.h"
20 #include "missing_syscall.h"
21 #include "prioq.h"
22 #include "process-util.h"
23 #include "set.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "strxcpyx.h"
28 #include "time-util.h"
29
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38 }
39
40 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
53 [SOURCE_INOTIFY] = "inotify",
54 };
55
56 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event {
61 unsigned n_ref;
62
63 int epoll_fd;
64 int watchdog_fd;
65
66 Prioq *pending;
67 Prioq *prepare;
68
69 /* timerfd_create() only supports these five clocks so far. We
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
73 struct clock_data boottime;
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
77
78 usec_t perturb;
79
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
82
83 Hashmap *child_sources;
84 unsigned n_enabled_child_sources;
85
86 Set *post_sources;
87
88 Prioq *exit;
89
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
98 pid_t original_pid;
99
100 uint64_t iteration;
101 triple_timestamp timestamp;
102 int state;
103
104 bool exit_requested:1;
105 bool need_process_child:1;
106 bool watchdog:1;
107 bool profile_delays:1;
108
109 int exit_code;
110
111 pid_t tid;
112 sd_event **default_event_ptr;
113
114 usec_t watchdog_last, watchdog_period;
115
116 unsigned n_sources;
117
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
121 LIST_HEAD(sd_event_source, sources);
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
125 };
126
127 static thread_local sd_event *default_event = NULL;
128
129 static void source_disconnect(sd_event_source *s);
130 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
131
132 static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134 }
135
136 static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
138 int r;
139
140 assert(x->pending);
141 assert(y->pending);
142
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
145 return -1;
146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
147 return 1;
148
149 /* Lower priority values first */
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
153
154 /* Older entries first */
155 return CMP(x->pending_iteration, y->pending_iteration);
156 }
157
158 static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
160 int r;
161
162 assert(x->prepare);
163 assert(y->prepare);
164
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
177
178 /* Lower priority values first */
179 return CMP(x->priority, y->priority);
180 }
181
182 static int earliest_time_prioq_compare(const void *a, const void *b) {
183 const sd_event_source *x = a, *y = b;
184
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
187
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190 return -1;
191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
201 return CMP(x->time.next, y->time.next);
202 }
203
204 static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206 }
207
208 static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
213
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
216 return -1;
217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
228 }
229
230 static int exit_prioq_compare(const void *a, const void *b) {
231 const sd_event_source *x = a, *y = b;
232
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
235
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
238 return -1;
239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
240 return 1;
241
242 /* Lower priority values first */
243 return CMP(x->priority, y->priority);
244 }
245
246 static void free_clock_data(struct clock_data *d) {
247 assert(d);
248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253 }
254
255 static sd_event *event_free(sd_event *e) {
256 sd_event_source *s;
257
258 assert(e);
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
266 assert(e->n_sources == 0);
267
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
271 safe_close(e->epoll_fd);
272 safe_close(e->watchdog_fd);
273
274 free_clock_data(&e->realtime);
275 free_clock_data(&e->boottime);
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
282 prioq_free(e->exit);
283
284 free(e->signal_sources);
285 hashmap_free(e->signal_data);
286
287 hashmap_free(e->inotify_data);
288
289 hashmap_free(e->child_sources);
290 set_free(e->post_sources);
291
292 free(e->event_queue);
293
294 return mfree(e);
295 }
296
297 _public_ int sd_event_new(sd_event** ret) {
298 sd_event *e;
299 int r;
300
301 assert_return(ret, -EINVAL);
302
303 e = new(sd_event, 1);
304 if (!e)
305 return -ENOMEM;
306
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
329
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
332 goto fail;
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
344 e->profile_delays = true;
345 }
346
347 *ret = e;
348 return 0;
349
350 fail:
351 event_free(e);
352 return r;
353 }
354
355 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
356
357 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361 }
362
363 static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
366 /* We don't support people creating an event loop and keeping
367 * it around over a fork(). Let's complain. */
368
369 return e->original_pid != getpid_cached();
370 }
371
372 static void source_io_unregister(sd_event_source *s) {
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
376 if (event_pid_changed(s->event))
377 return;
378
379 if (!s->io.registered)
380 return;
381
382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
385
386 s->io.registered = false;
387 }
388
389 static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
394 assert(s);
395 assert(s->type == SOURCE_IO);
396 assert(enabled != SD_EVENT_OFF);
397
398 struct epoll_event ev = {
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
402 int r;
403
404 r = epoll_ctl(s->event->epoll_fd,
405 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
406 s->io.fd,
407 &ev);
408 if (r < 0)
409 return -errno;
410
411 s->io.registered = true;
412
413 return 0;
414 }
415
416 static void source_child_pidfd_unregister(sd_event_source *s) {
417 assert(s);
418 assert(s->type == SOURCE_CHILD);
419
420 if (event_pid_changed(s->event))
421 return;
422
423 if (!s->child.registered)
424 return;
425
426 if (EVENT_SOURCE_WATCH_PIDFD(s))
427 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
428 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
429 strna(s->description), event_source_type_to_string(s->type));
430
431 s->child.registered = false;
432 }
433
434 static int source_child_pidfd_register(sd_event_source *s, int enabled) {
435 int r;
436
437 assert(s);
438 assert(s->type == SOURCE_CHILD);
439 assert(enabled != SD_EVENT_OFF);
440
441 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
442 struct epoll_event ev = {
443 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
444 .data.ptr = s,
445 };
446
447 if (s->child.registered)
448 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
449 else
450 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
451 if (r < 0)
452 return -errno;
453 }
454
455 s->child.registered = true;
456 return 0;
457 }
458
459 static clockid_t event_source_type_to_clock(EventSourceType t) {
460
461 switch (t) {
462
463 case SOURCE_TIME_REALTIME:
464 return CLOCK_REALTIME;
465
466 case SOURCE_TIME_BOOTTIME:
467 return CLOCK_BOOTTIME;
468
469 case SOURCE_TIME_MONOTONIC:
470 return CLOCK_MONOTONIC;
471
472 case SOURCE_TIME_REALTIME_ALARM:
473 return CLOCK_REALTIME_ALARM;
474
475 case SOURCE_TIME_BOOTTIME_ALARM:
476 return CLOCK_BOOTTIME_ALARM;
477
478 default:
479 return (clockid_t) -1;
480 }
481 }
482
483 static EventSourceType clock_to_event_source_type(clockid_t clock) {
484
485 switch (clock) {
486
487 case CLOCK_REALTIME:
488 return SOURCE_TIME_REALTIME;
489
490 case CLOCK_BOOTTIME:
491 return SOURCE_TIME_BOOTTIME;
492
493 case CLOCK_MONOTONIC:
494 return SOURCE_TIME_MONOTONIC;
495
496 case CLOCK_REALTIME_ALARM:
497 return SOURCE_TIME_REALTIME_ALARM;
498
499 case CLOCK_BOOTTIME_ALARM:
500 return SOURCE_TIME_BOOTTIME_ALARM;
501
502 default:
503 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
504 }
505 }
506
507 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
508 assert(e);
509
510 switch (t) {
511
512 case SOURCE_TIME_REALTIME:
513 return &e->realtime;
514
515 case SOURCE_TIME_BOOTTIME:
516 return &e->boottime;
517
518 case SOURCE_TIME_MONOTONIC:
519 return &e->monotonic;
520
521 case SOURCE_TIME_REALTIME_ALARM:
522 return &e->realtime_alarm;
523
524 case SOURCE_TIME_BOOTTIME_ALARM:
525 return &e->boottime_alarm;
526
527 default:
528 return NULL;
529 }
530 }
531
532 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
533 assert(e);
534
535 if (!d)
536 return;
537
538 hashmap_remove(e->signal_data, &d->priority);
539 safe_close(d->fd);
540 free(d);
541 }
542
543 static int event_make_signal_data(
544 sd_event *e,
545 int sig,
546 struct signal_data **ret) {
547
548 struct signal_data *d;
549 bool added = false;
550 sigset_t ss_copy;
551 int64_t priority;
552 int r;
553
554 assert(e);
555
556 if (event_pid_changed(e))
557 return -ECHILD;
558
559 if (e->signal_sources && e->signal_sources[sig])
560 priority = e->signal_sources[sig]->priority;
561 else
562 priority = SD_EVENT_PRIORITY_NORMAL;
563
564 d = hashmap_get(e->signal_data, &priority);
565 if (d) {
566 if (sigismember(&d->sigset, sig) > 0) {
567 if (ret)
568 *ret = d;
569 return 0;
570 }
571 } else {
572 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
573 if (r < 0)
574 return r;
575
576 d = new(struct signal_data, 1);
577 if (!d)
578 return -ENOMEM;
579
580 *d = (struct signal_data) {
581 .wakeup = WAKEUP_SIGNAL_DATA,
582 .fd = -1,
583 .priority = priority,
584 };
585
586 r = hashmap_put(e->signal_data, &d->priority, d);
587 if (r < 0) {
588 free(d);
589 return r;
590 }
591
592 added = true;
593 }
594
595 ss_copy = d->sigset;
596 assert_se(sigaddset(&ss_copy, sig) >= 0);
597
598 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
599 if (r < 0) {
600 r = -errno;
601 goto fail;
602 }
603
604 d->sigset = ss_copy;
605
606 if (d->fd >= 0) {
607 if (ret)
608 *ret = d;
609 return 0;
610 }
611
612 d->fd = fd_move_above_stdio(r);
613
614 struct epoll_event ev = {
615 .events = EPOLLIN,
616 .data.ptr = d,
617 };
618
619 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
620 if (r < 0) {
621 r = -errno;
622 goto fail;
623 }
624
625 if (ret)
626 *ret = d;
627
628 return 0;
629
630 fail:
631 if (added)
632 event_free_signal_data(e, d);
633
634 return r;
635 }
636
637 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
638 assert(e);
639 assert(d);
640
641 /* Turns off the specified signal in the signal data
642 * object. If the signal mask of the object becomes empty that
643 * way removes it. */
644
645 if (sigismember(&d->sigset, sig) == 0)
646 return;
647
648 assert_se(sigdelset(&d->sigset, sig) >= 0);
649
650 if (sigisemptyset(&d->sigset)) {
651 /* If all the mask is all-zero we can get rid of the structure */
652 event_free_signal_data(e, d);
653 return;
654 }
655
656 assert(d->fd >= 0);
657
658 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
659 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
660 }
661
662 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
663 struct signal_data *d;
664 static const int64_t zero_priority = 0;
665
666 assert(e);
667
668 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
669 * and possibly drop the signalfd for it. */
670
671 if (sig == SIGCHLD &&
672 e->n_enabled_child_sources > 0)
673 return;
674
675 if (e->signal_sources &&
676 e->signal_sources[sig] &&
677 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
678 return;
679
680 /*
681 * The specified signal might be enabled in three different queues:
682 *
683 * 1) the one that belongs to the priority passed (if it is non-NULL)
684 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
685 * 3) the 0 priority (to cover the SIGCHLD case)
686 *
687 * Hence, let's remove it from all three here.
688 */
689
690 if (priority) {
691 d = hashmap_get(e->signal_data, priority);
692 if (d)
693 event_unmask_signal_data(e, d, sig);
694 }
695
696 if (e->signal_sources && e->signal_sources[sig]) {
697 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
698 if (d)
699 event_unmask_signal_data(e, d, sig);
700 }
701
702 d = hashmap_get(e->signal_data, &zero_priority);
703 if (d)
704 event_unmask_signal_data(e, d, sig);
705 }
706
707 static void source_disconnect(sd_event_source *s) {
708 sd_event *event;
709
710 assert(s);
711
712 if (!s->event)
713 return;
714
715 assert(s->event->n_sources > 0);
716
717 switch (s->type) {
718
719 case SOURCE_IO:
720 if (s->io.fd >= 0)
721 source_io_unregister(s);
722
723 break;
724
725 case SOURCE_TIME_REALTIME:
726 case SOURCE_TIME_BOOTTIME:
727 case SOURCE_TIME_MONOTONIC:
728 case SOURCE_TIME_REALTIME_ALARM:
729 case SOURCE_TIME_BOOTTIME_ALARM: {
730 struct clock_data *d;
731
732 d = event_get_clock_data(s->event, s->type);
733 assert(d);
734
735 prioq_remove(d->earliest, s, &s->time.earliest_index);
736 prioq_remove(d->latest, s, &s->time.latest_index);
737 d->needs_rearm = true;
738 break;
739 }
740
741 case SOURCE_SIGNAL:
742 if (s->signal.sig > 0) {
743
744 if (s->event->signal_sources)
745 s->event->signal_sources[s->signal.sig] = NULL;
746
747 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
748 }
749
750 break;
751
752 case SOURCE_CHILD:
753 if (s->child.pid > 0) {
754 if (s->enabled != SD_EVENT_OFF) {
755 assert(s->event->n_enabled_child_sources > 0);
756 s->event->n_enabled_child_sources--;
757 }
758
759 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
760 }
761
762 if (EVENT_SOURCE_WATCH_PIDFD(s))
763 source_child_pidfd_unregister(s);
764 else
765 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
766
767 break;
768
769 case SOURCE_DEFER:
770 /* nothing */
771 break;
772
773 case SOURCE_POST:
774 set_remove(s->event->post_sources, s);
775 break;
776
777 case SOURCE_EXIT:
778 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
779 break;
780
781 case SOURCE_INOTIFY: {
782 struct inode_data *inode_data;
783
784 inode_data = s->inotify.inode_data;
785 if (inode_data) {
786 struct inotify_data *inotify_data;
787 assert_se(inotify_data = inode_data->inotify_data);
788
789 /* Detach this event source from the inode object */
790 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
791 s->inotify.inode_data = NULL;
792
793 if (s->pending) {
794 assert(inotify_data->n_pending > 0);
795 inotify_data->n_pending--;
796 }
797
798 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
799 * continued to being watched. That's because inotify doesn't really have an API for that: we
800 * can only change watch masks with access to the original inode either by fd or by path. But
801 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
802 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
803 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
804 * there), but given the need for open_by_handle_at() which is privileged and not universally
805 * available this would be quite an incomplete solution. Hence we go the other way, leave the
806 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
807 * anymore after reception. Yes, this sucks, but … Linux … */
808
809 /* Maybe release the inode data (and its inotify) */
810 event_gc_inode_data(s->event, inode_data);
811 }
812
813 break;
814 }
815
816 default:
817 assert_not_reached("Wut? I shouldn't exist.");
818 }
819
820 if (s->pending)
821 prioq_remove(s->event->pending, s, &s->pending_index);
822
823 if (s->prepare)
824 prioq_remove(s->event->prepare, s, &s->prepare_index);
825
826 event = TAKE_PTR(s->event);
827 LIST_REMOVE(sources, event->sources, s);
828 event->n_sources--;
829
830 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
831 * pidfd associated with this event source, which we'll do only on source_free(). */
832
833 if (!s->floating)
834 sd_event_unref(event);
835 }
836
837 static void source_free(sd_event_source *s) {
838 assert(s);
839
840 source_disconnect(s);
841
842 if (s->type == SOURCE_IO && s->io.owned)
843 s->io.fd = safe_close(s->io.fd);
844
845 if (s->type == SOURCE_CHILD) {
846 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
847
848 if (s->child.process_owned) {
849
850 if (!s->child.exited) {
851 bool sent = false;
852
853 if (s->child.pidfd >= 0) {
854 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
855 if (errno == ESRCH) /* Already dead */
856 sent = true;
857 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
858 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
859 s->child.pid);
860 } else
861 sent = true;
862 }
863
864 if (!sent)
865 if (kill(s->child.pid, SIGKILL) < 0)
866 if (errno != ESRCH) /* Already dead */
867 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
868 s->child.pid);
869 }
870
871 if (!s->child.waited) {
872 siginfo_t si = {};
873
874 /* Reap the child if we can */
875 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
876 }
877 }
878
879 if (s->child.pidfd_owned)
880 s->child.pidfd = safe_close(s->child.pidfd);
881 }
882
883 if (s->destroy_callback)
884 s->destroy_callback(s->userdata);
885
886 free(s->description);
887 free(s);
888 }
889 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
890
891 static int source_set_pending(sd_event_source *s, bool b) {
892 int r;
893
894 assert(s);
895 assert(s->type != SOURCE_EXIT);
896
897 if (s->pending == b)
898 return 0;
899
900 s->pending = b;
901
902 if (b) {
903 s->pending_iteration = s->event->iteration;
904
905 r = prioq_put(s->event->pending, s, &s->pending_index);
906 if (r < 0) {
907 s->pending = false;
908 return r;
909 }
910 } else
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
912
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
915
916 d = event_get_clock_data(s->event, s->type);
917 assert(d);
918
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
922 }
923
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
926
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
929 d->current = NULL;
930 }
931
932 if (s->type == SOURCE_INOTIFY) {
933
934 assert(s->inotify.inode_data);
935 assert(s->inotify.inode_data->inotify_data);
936
937 if (b)
938 s->inotify.inode_data->inotify_data->n_pending ++;
939 else {
940 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
941 s->inotify.inode_data->inotify_data->n_pending --;
942 }
943 }
944
945 return 0;
946 }
947
948 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
949 sd_event_source *s;
950
951 assert(e);
952
953 s = new(sd_event_source, 1);
954 if (!s)
955 return NULL;
956
957 *s = (struct sd_event_source) {
958 .n_ref = 1,
959 .event = e,
960 .floating = floating,
961 .type = type,
962 .pending_index = PRIOQ_IDX_NULL,
963 .prepare_index = PRIOQ_IDX_NULL,
964 };
965
966 if (!floating)
967 sd_event_ref(e);
968
969 LIST_PREPEND(sources, e->sources, s);
970 e->n_sources++;
971
972 return s;
973 }
974
975 _public_ int sd_event_add_io(
976 sd_event *e,
977 sd_event_source **ret,
978 int fd,
979 uint32_t events,
980 sd_event_io_handler_t callback,
981 void *userdata) {
982
983 _cleanup_(source_freep) sd_event_source *s = NULL;
984 int r;
985
986 assert_return(e, -EINVAL);
987 assert_return(e = event_resolve(e), -ENOPKG);
988 assert_return(fd >= 0, -EBADF);
989 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
990 assert_return(callback, -EINVAL);
991 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
992 assert_return(!event_pid_changed(e), -ECHILD);
993
994 s = source_new(e, !ret, SOURCE_IO);
995 if (!s)
996 return -ENOMEM;
997
998 s->wakeup = WAKEUP_EVENT_SOURCE;
999 s->io.fd = fd;
1000 s->io.events = events;
1001 s->io.callback = callback;
1002 s->userdata = userdata;
1003 s->enabled = SD_EVENT_ON;
1004
1005 r = source_io_register(s, s->enabled, events);
1006 if (r < 0)
1007 return r;
1008
1009 if (ret)
1010 *ret = s;
1011 TAKE_PTR(s);
1012
1013 return 0;
1014 }
1015
1016 static void initialize_perturb(sd_event *e) {
1017 sd_id128_t bootid = {};
1018
1019 /* When we sleep for longer, we try to realign the wakeup to
1020 the same time within each minute/second/250ms, so that
1021 events all across the system can be coalesced into a single
1022 CPU wakeup. However, let's take some system-specific
1023 randomness for this value, so that in a network of systems
1024 with synced clocks timer events are distributed a
1025 bit. Here, we calculate a perturbation usec offset from the
1026 boot ID. */
1027
1028 if (_likely_(e->perturb != USEC_INFINITY))
1029 return;
1030
1031 if (sd_id128_get_boot(&bootid) >= 0)
1032 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1033 }
1034
1035 static int event_setup_timer_fd(
1036 sd_event *e,
1037 struct clock_data *d,
1038 clockid_t clock) {
1039
1040 assert(e);
1041 assert(d);
1042
1043 if (_likely_(d->fd >= 0))
1044 return 0;
1045
1046 _cleanup_close_ int fd = -1;
1047 int r;
1048
1049 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1050 if (fd < 0)
1051 return -errno;
1052
1053 fd = fd_move_above_stdio(fd);
1054
1055 struct epoll_event ev = {
1056 .events = EPOLLIN,
1057 .data.ptr = d,
1058 };
1059
1060 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1061 if (r < 0)
1062 return -errno;
1063
1064 d->fd = TAKE_FD(fd);
1065 return 0;
1066 }
1067
1068 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1069 assert(s);
1070
1071 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1072 }
1073
1074 _public_ int sd_event_add_time(
1075 sd_event *e,
1076 sd_event_source **ret,
1077 clockid_t clock,
1078 uint64_t usec,
1079 uint64_t accuracy,
1080 sd_event_time_handler_t callback,
1081 void *userdata) {
1082
1083 EventSourceType type;
1084 _cleanup_(source_freep) sd_event_source *s = NULL;
1085 struct clock_data *d;
1086 int r;
1087
1088 assert_return(e, -EINVAL);
1089 assert_return(e = event_resolve(e), -ENOPKG);
1090 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1091 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1092 assert_return(!event_pid_changed(e), -ECHILD);
1093
1094 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1095 return -EOPNOTSUPP;
1096
1097 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1098 if (type < 0)
1099 return -EOPNOTSUPP;
1100
1101 if (!callback)
1102 callback = time_exit_callback;
1103
1104 d = event_get_clock_data(e, type);
1105 assert(d);
1106
1107 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1108 if (r < 0)
1109 return r;
1110
1111 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1112 if (r < 0)
1113 return r;
1114
1115 if (d->fd < 0) {
1116 r = event_setup_timer_fd(e, d, clock);
1117 if (r < 0)
1118 return r;
1119 }
1120
1121 s = source_new(e, !ret, type);
1122 if (!s)
1123 return -ENOMEM;
1124
1125 s->time.next = usec;
1126 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1127 s->time.callback = callback;
1128 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1129 s->userdata = userdata;
1130 s->enabled = SD_EVENT_ONESHOT;
1131
1132 d->needs_rearm = true;
1133
1134 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1135 if (r < 0)
1136 return r;
1137
1138 r = prioq_put(d->latest, s, &s->time.latest_index);
1139 if (r < 0)
1140 return r;
1141
1142 if (ret)
1143 *ret = s;
1144 TAKE_PTR(s);
1145
1146 return 0;
1147 }
1148
1149 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1150 assert(s);
1151
1152 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1153 }
1154
1155 _public_ int sd_event_add_signal(
1156 sd_event *e,
1157 sd_event_source **ret,
1158 int sig,
1159 sd_event_signal_handler_t callback,
1160 void *userdata) {
1161
1162 _cleanup_(source_freep) sd_event_source *s = NULL;
1163 struct signal_data *d;
1164 int r;
1165
1166 assert_return(e, -EINVAL);
1167 assert_return(e = event_resolve(e), -ENOPKG);
1168 assert_return(SIGNAL_VALID(sig), -EINVAL);
1169 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1170 assert_return(!event_pid_changed(e), -ECHILD);
1171
1172 if (!callback)
1173 callback = signal_exit_callback;
1174
1175 r = signal_is_blocked(sig);
1176 if (r < 0)
1177 return r;
1178 if (r == 0)
1179 return -EBUSY;
1180
1181 if (!e->signal_sources) {
1182 e->signal_sources = new0(sd_event_source*, _NSIG);
1183 if (!e->signal_sources)
1184 return -ENOMEM;
1185 } else if (e->signal_sources[sig])
1186 return -EBUSY;
1187
1188 s = source_new(e, !ret, SOURCE_SIGNAL);
1189 if (!s)
1190 return -ENOMEM;
1191
1192 s->signal.sig = sig;
1193 s->signal.callback = callback;
1194 s->userdata = userdata;
1195 s->enabled = SD_EVENT_ON;
1196
1197 e->signal_sources[sig] = s;
1198
1199 r = event_make_signal_data(e, sig, &d);
1200 if (r < 0)
1201 return r;
1202
1203 /* Use the signal name as description for the event source by default */
1204 (void) sd_event_source_set_description(s, signal_to_string(sig));
1205
1206 if (ret)
1207 *ret = s;
1208 TAKE_PTR(s);
1209
1210 return 0;
1211 }
1212
1213 static bool shall_use_pidfd(void) {
1214 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1215 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1216 }
1217
1218 _public_ int sd_event_add_child(
1219 sd_event *e,
1220 sd_event_source **ret,
1221 pid_t pid,
1222 int options,
1223 sd_event_child_handler_t callback,
1224 void *userdata) {
1225
1226 _cleanup_(source_freep) sd_event_source *s = NULL;
1227 int r;
1228
1229 assert_return(e, -EINVAL);
1230 assert_return(e = event_resolve(e), -ENOPKG);
1231 assert_return(pid > 1, -EINVAL);
1232 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1233 assert_return(options != 0, -EINVAL);
1234 assert_return(callback, -EINVAL);
1235 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1236 assert_return(!event_pid_changed(e), -ECHILD);
1237
1238 if (e->n_enabled_child_sources == 0) {
1239 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1240 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1241 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1242 * take effect.
1243 *
1244 * (As an optimization we only do this check on the first child event source created.) */
1245 r = signal_is_blocked(SIGCHLD);
1246 if (r < 0)
1247 return r;
1248 if (r == 0)
1249 return -EBUSY;
1250 }
1251
1252 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1253 if (r < 0)
1254 return r;
1255
1256 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1257 return -EBUSY;
1258
1259 s = source_new(e, !ret, SOURCE_CHILD);
1260 if (!s)
1261 return -ENOMEM;
1262
1263 s->wakeup = WAKEUP_EVENT_SOURCE;
1264 s->child.pid = pid;
1265 s->child.options = options;
1266 s->child.callback = callback;
1267 s->userdata = userdata;
1268 s->enabled = SD_EVENT_ONESHOT;
1269
1270 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1271 * pin the PID, and make regular waitid() handling race-free. */
1272
1273 if (shall_use_pidfd()) {
1274 s->child.pidfd = pidfd_open(s->child.pid, 0);
1275 if (s->child.pidfd < 0) {
1276 /* Propagate errors unless the syscall is not supported or blocked */
1277 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1278 return -errno;
1279 } else
1280 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1281 } else
1282 s->child.pidfd = -1;
1283
1284 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1285 if (r < 0)
1286 return r;
1287
1288 e->n_enabled_child_sources++;
1289
1290 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1291 /* We have a pidfd and we only want to watch for exit */
1292
1293 r = source_child_pidfd_register(s, s->enabled);
1294 if (r < 0) {
1295 e->n_enabled_child_sources--;
1296 return r;
1297 }
1298 } else {
1299 /* We have no pidfd or we shall wait for some other event than WEXITED */
1300
1301 r = event_make_signal_data(e, SIGCHLD, NULL);
1302 if (r < 0) {
1303 e->n_enabled_child_sources--;
1304 return r;
1305 }
1306
1307 e->need_process_child = true;
1308 }
1309
1310 if (ret)
1311 *ret = s;
1312
1313 TAKE_PTR(s);
1314 return 0;
1315 }
1316
1317 _public_ int sd_event_add_child_pidfd(
1318 sd_event *e,
1319 sd_event_source **ret,
1320 int pidfd,
1321 int options,
1322 sd_event_child_handler_t callback,
1323 void *userdata) {
1324
1325
1326 _cleanup_(source_freep) sd_event_source *s = NULL;
1327 pid_t pid;
1328 int r;
1329
1330 assert_return(e, -EINVAL);
1331 assert_return(e = event_resolve(e), -ENOPKG);
1332 assert_return(pidfd >= 0, -EBADF);
1333 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1334 assert_return(options != 0, -EINVAL);
1335 assert_return(callback, -EINVAL);
1336 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1337 assert_return(!event_pid_changed(e), -ECHILD);
1338
1339 if (e->n_enabled_child_sources == 0) {
1340 r = signal_is_blocked(SIGCHLD);
1341 if (r < 0)
1342 return r;
1343 if (r == 0)
1344 return -EBUSY;
1345 }
1346
1347 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1348 if (r < 0)
1349 return r;
1350
1351 r = pidfd_get_pid(pidfd, &pid);
1352 if (r < 0)
1353 return r;
1354
1355 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1356 return -EBUSY;
1357
1358 s = source_new(e, !ret, SOURCE_CHILD);
1359 if (!s)
1360 return -ENOMEM;
1361
1362 s->wakeup = WAKEUP_EVENT_SOURCE;
1363 s->child.pidfd = pidfd;
1364 s->child.pid = pid;
1365 s->child.options = options;
1366 s->child.callback = callback;
1367 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1368 s->userdata = userdata;
1369 s->enabled = SD_EVENT_ONESHOT;
1370
1371 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1372 if (r < 0)
1373 return r;
1374
1375 e->n_enabled_child_sources++;
1376
1377 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1378 /* We only want to watch for WEXITED */
1379
1380 r = source_child_pidfd_register(s, s->enabled);
1381 if (r < 0) {
1382 e->n_enabled_child_sources--;
1383 return r;
1384 }
1385 } else {
1386 /* We shall wait for some other event than WEXITED */
1387
1388 r = event_make_signal_data(e, SIGCHLD, NULL);
1389 if (r < 0) {
1390 e->n_enabled_child_sources--;
1391 return r;
1392 }
1393
1394 e->need_process_child = true;
1395 }
1396
1397 if (ret)
1398 *ret = s;
1399
1400 TAKE_PTR(s);
1401 return 0;
1402 }
1403
1404 _public_ int sd_event_add_defer(
1405 sd_event *e,
1406 sd_event_source **ret,
1407 sd_event_handler_t callback,
1408 void *userdata) {
1409
1410 _cleanup_(source_freep) sd_event_source *s = NULL;
1411 int r;
1412
1413 assert_return(e, -EINVAL);
1414 assert_return(e = event_resolve(e), -ENOPKG);
1415 assert_return(callback, -EINVAL);
1416 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1417 assert_return(!event_pid_changed(e), -ECHILD);
1418
1419 s = source_new(e, !ret, SOURCE_DEFER);
1420 if (!s)
1421 return -ENOMEM;
1422
1423 s->defer.callback = callback;
1424 s->userdata = userdata;
1425 s->enabled = SD_EVENT_ONESHOT;
1426
1427 r = source_set_pending(s, true);
1428 if (r < 0)
1429 return r;
1430
1431 if (ret)
1432 *ret = s;
1433 TAKE_PTR(s);
1434
1435 return 0;
1436 }
1437
1438 _public_ int sd_event_add_post(
1439 sd_event *e,
1440 sd_event_source **ret,
1441 sd_event_handler_t callback,
1442 void *userdata) {
1443
1444 _cleanup_(source_freep) sd_event_source *s = NULL;
1445 int r;
1446
1447 assert_return(e, -EINVAL);
1448 assert_return(e = event_resolve(e), -ENOPKG);
1449 assert_return(callback, -EINVAL);
1450 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1451 assert_return(!event_pid_changed(e), -ECHILD);
1452
1453 r = set_ensure_allocated(&e->post_sources, NULL);
1454 if (r < 0)
1455 return r;
1456
1457 s = source_new(e, !ret, SOURCE_POST);
1458 if (!s)
1459 return -ENOMEM;
1460
1461 s->post.callback = callback;
1462 s->userdata = userdata;
1463 s->enabled = SD_EVENT_ON;
1464
1465 r = set_put(e->post_sources, s);
1466 if (r < 0)
1467 return r;
1468
1469 if (ret)
1470 *ret = s;
1471 TAKE_PTR(s);
1472
1473 return 0;
1474 }
1475
1476 _public_ int sd_event_add_exit(
1477 sd_event *e,
1478 sd_event_source **ret,
1479 sd_event_handler_t callback,
1480 void *userdata) {
1481
1482 _cleanup_(source_freep) sd_event_source *s = NULL;
1483 int r;
1484
1485 assert_return(e, -EINVAL);
1486 assert_return(e = event_resolve(e), -ENOPKG);
1487 assert_return(callback, -EINVAL);
1488 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1489 assert_return(!event_pid_changed(e), -ECHILD);
1490
1491 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1492 if (r < 0)
1493 return r;
1494
1495 s = source_new(e, !ret, SOURCE_EXIT);
1496 if (!s)
1497 return -ENOMEM;
1498
1499 s->exit.callback = callback;
1500 s->userdata = userdata;
1501 s->exit.prioq_index = PRIOQ_IDX_NULL;
1502 s->enabled = SD_EVENT_ONESHOT;
1503
1504 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1505 if (r < 0)
1506 return r;
1507
1508 if (ret)
1509 *ret = s;
1510 TAKE_PTR(s);
1511
1512 return 0;
1513 }
1514
1515 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1516 assert(e);
1517
1518 if (!d)
1519 return;
1520
1521 assert(hashmap_isempty(d->inodes));
1522 assert(hashmap_isempty(d->wd));
1523
1524 if (d->buffer_filled > 0)
1525 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1526
1527 hashmap_free(d->inodes);
1528 hashmap_free(d->wd);
1529
1530 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1531
1532 if (d->fd >= 0) {
1533 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1534 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1535
1536 safe_close(d->fd);
1537 }
1538 free(d);
1539 }
1540
1541 static int event_make_inotify_data(
1542 sd_event *e,
1543 int64_t priority,
1544 struct inotify_data **ret) {
1545
1546 _cleanup_close_ int fd = -1;
1547 struct inotify_data *d;
1548 int r;
1549
1550 assert(e);
1551
1552 d = hashmap_get(e->inotify_data, &priority);
1553 if (d) {
1554 if (ret)
1555 *ret = d;
1556 return 0;
1557 }
1558
1559 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1560 if (fd < 0)
1561 return -errno;
1562
1563 fd = fd_move_above_stdio(fd);
1564
1565 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1566 if (r < 0)
1567 return r;
1568
1569 d = new(struct inotify_data, 1);
1570 if (!d)
1571 return -ENOMEM;
1572
1573 *d = (struct inotify_data) {
1574 .wakeup = WAKEUP_INOTIFY_DATA,
1575 .fd = TAKE_FD(fd),
1576 .priority = priority,
1577 };
1578
1579 r = hashmap_put(e->inotify_data, &d->priority, d);
1580 if (r < 0) {
1581 d->fd = safe_close(d->fd);
1582 free(d);
1583 return r;
1584 }
1585
1586 struct epoll_event ev = {
1587 .events = EPOLLIN,
1588 .data.ptr = d,
1589 };
1590
1591 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1592 r = -errno;
1593 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1594 * remove the fd from the epoll first, which we don't want as we couldn't
1595 * add it in the first place. */
1596 event_free_inotify_data(e, d);
1597 return r;
1598 }
1599
1600 if (ret)
1601 *ret = d;
1602
1603 return 1;
1604 }
1605
1606 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1607 int r;
1608
1609 assert(x);
1610 assert(y);
1611
1612 r = CMP(x->dev, y->dev);
1613 if (r != 0)
1614 return r;
1615
1616 return CMP(x->ino, y->ino);
1617 }
1618
1619 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1620 assert(d);
1621
1622 siphash24_compress(&d->dev, sizeof(d->dev), state);
1623 siphash24_compress(&d->ino, sizeof(d->ino), state);
1624 }
1625
1626 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1627
1628 static void event_free_inode_data(
1629 sd_event *e,
1630 struct inode_data *d) {
1631
1632 assert(e);
1633
1634 if (!d)
1635 return;
1636
1637 assert(!d->event_sources);
1638
1639 if (d->fd >= 0) {
1640 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1641 safe_close(d->fd);
1642 }
1643
1644 if (d->inotify_data) {
1645
1646 if (d->wd >= 0) {
1647 if (d->inotify_data->fd >= 0) {
1648 /* So here's a problem. At the time this runs the watch descriptor might already be
1649 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1650 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1651 * likely case to happen. */
1652
1653 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1654 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1655 }
1656
1657 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1658 }
1659
1660 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1661 }
1662
1663 free(d);
1664 }
1665
1666 static void event_gc_inode_data(
1667 sd_event *e,
1668 struct inode_data *d) {
1669
1670 struct inotify_data *inotify_data;
1671
1672 assert(e);
1673
1674 if (!d)
1675 return;
1676
1677 if (d->event_sources)
1678 return;
1679
1680 inotify_data = d->inotify_data;
1681 event_free_inode_data(e, d);
1682
1683 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1684 event_free_inotify_data(e, inotify_data);
1685 }
1686
1687 static int event_make_inode_data(
1688 sd_event *e,
1689 struct inotify_data *inotify_data,
1690 dev_t dev,
1691 ino_t ino,
1692 struct inode_data **ret) {
1693
1694 struct inode_data *d, key;
1695 int r;
1696
1697 assert(e);
1698 assert(inotify_data);
1699
1700 key = (struct inode_data) {
1701 .ino = ino,
1702 .dev = dev,
1703 };
1704
1705 d = hashmap_get(inotify_data->inodes, &key);
1706 if (d) {
1707 if (ret)
1708 *ret = d;
1709
1710 return 0;
1711 }
1712
1713 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1714 if (r < 0)
1715 return r;
1716
1717 d = new(struct inode_data, 1);
1718 if (!d)
1719 return -ENOMEM;
1720
1721 *d = (struct inode_data) {
1722 .dev = dev,
1723 .ino = ino,
1724 .wd = -1,
1725 .fd = -1,
1726 .inotify_data = inotify_data,
1727 };
1728
1729 r = hashmap_put(inotify_data->inodes, d, d);
1730 if (r < 0) {
1731 free(d);
1732 return r;
1733 }
1734
1735 if (ret)
1736 *ret = d;
1737
1738 return 1;
1739 }
1740
1741 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1742 bool excl_unlink = true;
1743 uint32_t combined = 0;
1744 sd_event_source *s;
1745
1746 assert(d);
1747
1748 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1749 * the IN_EXCL_UNLINK flag is ANDed instead.
1750 *
1751 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1752 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1753 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1754 * events we don't care for client-side. */
1755
1756 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1757
1758 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1759 excl_unlink = false;
1760
1761 combined |= s->inotify.mask;
1762 }
1763
1764 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1765 }
1766
1767 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1768 uint32_t combined_mask;
1769 int wd, r;
1770
1771 assert(d);
1772 assert(d->fd >= 0);
1773
1774 combined_mask = inode_data_determine_mask(d);
1775
1776 if (d->wd >= 0 && combined_mask == d->combined_mask)
1777 return 0;
1778
1779 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1780 if (r < 0)
1781 return r;
1782
1783 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1784 if (wd < 0)
1785 return -errno;
1786
1787 if (d->wd < 0) {
1788 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1789 if (r < 0) {
1790 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1791 return r;
1792 }
1793
1794 d->wd = wd;
1795
1796 } else if (d->wd != wd) {
1797
1798 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1799 (void) inotify_rm_watch(d->fd, wd);
1800 return -EINVAL;
1801 }
1802
1803 d->combined_mask = combined_mask;
1804 return 1;
1805 }
1806
1807 _public_ int sd_event_add_inotify(
1808 sd_event *e,
1809 sd_event_source **ret,
1810 const char *path,
1811 uint32_t mask,
1812 sd_event_inotify_handler_t callback,
1813 void *userdata) {
1814
1815 struct inotify_data *inotify_data = NULL;
1816 struct inode_data *inode_data = NULL;
1817 _cleanup_close_ int fd = -1;
1818 _cleanup_(source_freep) sd_event_source *s = NULL;
1819 struct stat st;
1820 int r;
1821
1822 assert_return(e, -EINVAL);
1823 assert_return(e = event_resolve(e), -ENOPKG);
1824 assert_return(path, -EINVAL);
1825 assert_return(callback, -EINVAL);
1826 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1827 assert_return(!event_pid_changed(e), -ECHILD);
1828
1829 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1830 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1831 * the user can't use them for us. */
1832 if (mask & IN_MASK_ADD)
1833 return -EINVAL;
1834
1835 fd = open(path, O_PATH|O_CLOEXEC|
1836 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1837 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1838 if (fd < 0)
1839 return -errno;
1840
1841 if (fstat(fd, &st) < 0)
1842 return -errno;
1843
1844 s = source_new(e, !ret, SOURCE_INOTIFY);
1845 if (!s)
1846 return -ENOMEM;
1847
1848 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1849 s->inotify.mask = mask;
1850 s->inotify.callback = callback;
1851 s->userdata = userdata;
1852
1853 /* Allocate an inotify object for this priority, and an inode object within it */
1854 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1855 if (r < 0)
1856 return r;
1857
1858 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1859 if (r < 0) {
1860 event_free_inotify_data(e, inotify_data);
1861 return r;
1862 }
1863
1864 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1865 * the event source, until then, for which we need the original inode. */
1866 if (inode_data->fd < 0) {
1867 inode_data->fd = TAKE_FD(fd);
1868 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1869 }
1870
1871 /* Link our event source to the inode data object */
1872 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1873 s->inotify.inode_data = inode_data;
1874
1875 /* Actually realize the watch now */
1876 r = inode_data_realize_watch(e, inode_data);
1877 if (r < 0)
1878 return r;
1879
1880 (void) sd_event_source_set_description(s, path);
1881
1882 if (ret)
1883 *ret = s;
1884 TAKE_PTR(s);
1885
1886 return 0;
1887 }
1888
1889 static sd_event_source* event_source_free(sd_event_source *s) {
1890 if (!s)
1891 return NULL;
1892
1893 /* Here's a special hack: when we are called from a
1894 * dispatch handler we won't free the event source
1895 * immediately, but we will detach the fd from the
1896 * epoll. This way it is safe for the caller to unref
1897 * the event source and immediately close the fd, but
1898 * we still retain a valid event source object after
1899 * the callback. */
1900
1901 if (s->dispatching) {
1902 if (s->type == SOURCE_IO)
1903 source_io_unregister(s);
1904
1905 source_disconnect(s);
1906 } else
1907 source_free(s);
1908
1909 return NULL;
1910 }
1911
1912 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1913
1914 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1915 assert_return(s, -EINVAL);
1916 assert_return(!event_pid_changed(s->event), -ECHILD);
1917
1918 return free_and_strdup(&s->description, description);
1919 }
1920
1921 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1922 assert_return(s, -EINVAL);
1923 assert_return(description, -EINVAL);
1924 assert_return(!event_pid_changed(s->event), -ECHILD);
1925
1926 if (!s->description)
1927 return -ENXIO;
1928
1929 *description = s->description;
1930 return 0;
1931 }
1932
1933 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1934 assert_return(s, NULL);
1935
1936 return s->event;
1937 }
1938
1939 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1940 assert_return(s, -EINVAL);
1941 assert_return(s->type != SOURCE_EXIT, -EDOM);
1942 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1943 assert_return(!event_pid_changed(s->event), -ECHILD);
1944
1945 return s->pending;
1946 }
1947
1948 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1949 assert_return(s, -EINVAL);
1950 assert_return(s->type == SOURCE_IO, -EDOM);
1951 assert_return(!event_pid_changed(s->event), -ECHILD);
1952
1953 return s->io.fd;
1954 }
1955
1956 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1957 int r;
1958
1959 assert_return(s, -EINVAL);
1960 assert_return(fd >= 0, -EBADF);
1961 assert_return(s->type == SOURCE_IO, -EDOM);
1962 assert_return(!event_pid_changed(s->event), -ECHILD);
1963
1964 if (s->io.fd == fd)
1965 return 0;
1966
1967 if (s->enabled == SD_EVENT_OFF) {
1968 s->io.fd = fd;
1969 s->io.registered = false;
1970 } else {
1971 int saved_fd;
1972
1973 saved_fd = s->io.fd;
1974 assert(s->io.registered);
1975
1976 s->io.fd = fd;
1977 s->io.registered = false;
1978
1979 r = source_io_register(s, s->enabled, s->io.events);
1980 if (r < 0) {
1981 s->io.fd = saved_fd;
1982 s->io.registered = true;
1983 return r;
1984 }
1985
1986 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1987 }
1988
1989 return 0;
1990 }
1991
1992 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1993 assert_return(s, -EINVAL);
1994 assert_return(s->type == SOURCE_IO, -EDOM);
1995
1996 return s->io.owned;
1997 }
1998
1999 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2000 assert_return(s, -EINVAL);
2001 assert_return(s->type == SOURCE_IO, -EDOM);
2002
2003 s->io.owned = own;
2004 return 0;
2005 }
2006
2007 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2008 assert_return(s, -EINVAL);
2009 assert_return(events, -EINVAL);
2010 assert_return(s->type == SOURCE_IO, -EDOM);
2011 assert_return(!event_pid_changed(s->event), -ECHILD);
2012
2013 *events = s->io.events;
2014 return 0;
2015 }
2016
2017 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2018 int r;
2019
2020 assert_return(s, -EINVAL);
2021 assert_return(s->type == SOURCE_IO, -EDOM);
2022 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2023 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2024 assert_return(!event_pid_changed(s->event), -ECHILD);
2025
2026 /* edge-triggered updates are never skipped, so we can reset edges */
2027 if (s->io.events == events && !(events & EPOLLET))
2028 return 0;
2029
2030 r = source_set_pending(s, false);
2031 if (r < 0)
2032 return r;
2033
2034 if (s->enabled != SD_EVENT_OFF) {
2035 r = source_io_register(s, s->enabled, events);
2036 if (r < 0)
2037 return r;
2038 }
2039
2040 s->io.events = events;
2041
2042 return 0;
2043 }
2044
2045 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2046 assert_return(s, -EINVAL);
2047 assert_return(revents, -EINVAL);
2048 assert_return(s->type == SOURCE_IO, -EDOM);
2049 assert_return(s->pending, -ENODATA);
2050 assert_return(!event_pid_changed(s->event), -ECHILD);
2051
2052 *revents = s->io.revents;
2053 return 0;
2054 }
2055
2056 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2057 assert_return(s, -EINVAL);
2058 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2059 assert_return(!event_pid_changed(s->event), -ECHILD);
2060
2061 return s->signal.sig;
2062 }
2063
2064 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2065 assert_return(s, -EINVAL);
2066 assert_return(!event_pid_changed(s->event), -ECHILD);
2067
2068 *priority = s->priority;
2069 return 0;
2070 }
2071
2072 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2073 bool rm_inotify = false, rm_inode = false;
2074 struct inotify_data *new_inotify_data = NULL;
2075 struct inode_data *new_inode_data = NULL;
2076 int r;
2077
2078 assert_return(s, -EINVAL);
2079 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2080 assert_return(!event_pid_changed(s->event), -ECHILD);
2081
2082 if (s->priority == priority)
2083 return 0;
2084
2085 if (s->type == SOURCE_INOTIFY) {
2086 struct inode_data *old_inode_data;
2087
2088 assert(s->inotify.inode_data);
2089 old_inode_data = s->inotify.inode_data;
2090
2091 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2092 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2093 * events we allow priority changes only until the first following iteration. */
2094 if (old_inode_data->fd < 0)
2095 return -EOPNOTSUPP;
2096
2097 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2098 if (r < 0)
2099 return r;
2100 rm_inotify = r > 0;
2101
2102 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2103 if (r < 0)
2104 goto fail;
2105 rm_inode = r > 0;
2106
2107 if (new_inode_data->fd < 0) {
2108 /* Duplicate the fd for the new inode object if we don't have any yet */
2109 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2110 if (new_inode_data->fd < 0) {
2111 r = -errno;
2112 goto fail;
2113 }
2114
2115 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2116 }
2117
2118 /* Move the event source to the new inode data structure */
2119 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2120 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2121 s->inotify.inode_data = new_inode_data;
2122
2123 /* Now create the new watch */
2124 r = inode_data_realize_watch(s->event, new_inode_data);
2125 if (r < 0) {
2126 /* Move it back */
2127 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2128 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2129 s->inotify.inode_data = old_inode_data;
2130 goto fail;
2131 }
2132
2133 s->priority = priority;
2134
2135 event_gc_inode_data(s->event, old_inode_data);
2136
2137 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2138 struct signal_data *old, *d;
2139
2140 /* Move us from the signalfd belonging to the old
2141 * priority to the signalfd of the new priority */
2142
2143 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2144
2145 s->priority = priority;
2146
2147 r = event_make_signal_data(s->event, s->signal.sig, &d);
2148 if (r < 0) {
2149 s->priority = old->priority;
2150 return r;
2151 }
2152
2153 event_unmask_signal_data(s->event, old, s->signal.sig);
2154 } else
2155 s->priority = priority;
2156
2157 if (s->pending)
2158 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2159
2160 if (s->prepare)
2161 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2162
2163 if (s->type == SOURCE_EXIT)
2164 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2165
2166 return 0;
2167
2168 fail:
2169 if (rm_inode)
2170 event_free_inode_data(s->event, new_inode_data);
2171
2172 if (rm_inotify)
2173 event_free_inotify_data(s->event, new_inotify_data);
2174
2175 return r;
2176 }
2177
2178 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2179 assert_return(s, -EINVAL);
2180 assert_return(!event_pid_changed(s->event), -ECHILD);
2181
2182 if (m)
2183 *m = s->enabled;
2184 return s->enabled != SD_EVENT_OFF;
2185 }
2186
2187 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2188 int r;
2189
2190 assert_return(s, -EINVAL);
2191 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2192 assert_return(!event_pid_changed(s->event), -ECHILD);
2193
2194 /* If we are dead anyway, we are fine with turning off
2195 * sources, but everything else needs to fail. */
2196 if (s->event->state == SD_EVENT_FINISHED)
2197 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2198
2199 if (s->enabled == m)
2200 return 0;
2201
2202 if (m == SD_EVENT_OFF) {
2203
2204 /* Unset the pending flag when this event source is disabled */
2205 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2206 r = source_set_pending(s, false);
2207 if (r < 0)
2208 return r;
2209 }
2210
2211 switch (s->type) {
2212
2213 case SOURCE_IO:
2214 source_io_unregister(s);
2215 s->enabled = m;
2216 break;
2217
2218 case SOURCE_TIME_REALTIME:
2219 case SOURCE_TIME_BOOTTIME:
2220 case SOURCE_TIME_MONOTONIC:
2221 case SOURCE_TIME_REALTIME_ALARM:
2222 case SOURCE_TIME_BOOTTIME_ALARM: {
2223 struct clock_data *d;
2224
2225 s->enabled = m;
2226 d = event_get_clock_data(s->event, s->type);
2227 assert(d);
2228
2229 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2230 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2231 d->needs_rearm = true;
2232 break;
2233 }
2234
2235 case SOURCE_SIGNAL:
2236 s->enabled = m;
2237
2238 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2239 break;
2240
2241 case SOURCE_CHILD:
2242 s->enabled = m;
2243
2244 assert(s->event->n_enabled_child_sources > 0);
2245 s->event->n_enabled_child_sources--;
2246
2247 if (EVENT_SOURCE_WATCH_PIDFD(s))
2248 source_child_pidfd_unregister(s);
2249 else
2250 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2251
2252 break;
2253
2254 case SOURCE_EXIT:
2255 s->enabled = m;
2256 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2257 break;
2258
2259 case SOURCE_DEFER:
2260 case SOURCE_POST:
2261 case SOURCE_INOTIFY:
2262 s->enabled = m;
2263 break;
2264
2265 default:
2266 assert_not_reached("Wut? I shouldn't exist.");
2267 }
2268
2269 } else {
2270
2271 /* Unset the pending flag when this event source is enabled */
2272 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2273 r = source_set_pending(s, false);
2274 if (r < 0)
2275 return r;
2276 }
2277
2278 switch (s->type) {
2279
2280 case SOURCE_IO:
2281 r = source_io_register(s, m, s->io.events);
2282 if (r < 0)
2283 return r;
2284
2285 s->enabled = m;
2286 break;
2287
2288 case SOURCE_TIME_REALTIME:
2289 case SOURCE_TIME_BOOTTIME:
2290 case SOURCE_TIME_MONOTONIC:
2291 case SOURCE_TIME_REALTIME_ALARM:
2292 case SOURCE_TIME_BOOTTIME_ALARM: {
2293 struct clock_data *d;
2294
2295 s->enabled = m;
2296 d = event_get_clock_data(s->event, s->type);
2297 assert(d);
2298
2299 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2300 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2301 d->needs_rearm = true;
2302 break;
2303 }
2304
2305 case SOURCE_SIGNAL:
2306
2307 s->enabled = m;
2308
2309 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2310 if (r < 0) {
2311 s->enabled = SD_EVENT_OFF;
2312 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2313 return r;
2314 }
2315
2316 break;
2317
2318 case SOURCE_CHILD:
2319
2320 if (s->enabled == SD_EVENT_OFF)
2321 s->event->n_enabled_child_sources++;
2322
2323 s->enabled = m;
2324
2325 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2326 /* yes, we have pidfd */
2327
2328 r = source_child_pidfd_register(s, s->enabled);
2329 if (r < 0) {
2330 s->enabled = SD_EVENT_OFF;
2331 s->event->n_enabled_child_sources--;
2332 return r;
2333 }
2334 } else {
2335 /* no pidfd, or something other to watch for than WEXITED */
2336
2337 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2338 if (r < 0) {
2339 s->enabled = SD_EVENT_OFF;
2340 s->event->n_enabled_child_sources--;
2341 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2342 return r;
2343 }
2344 }
2345
2346 break;
2347
2348 case SOURCE_EXIT:
2349 s->enabled = m;
2350 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2351 break;
2352
2353 case SOURCE_DEFER:
2354 case SOURCE_POST:
2355 case SOURCE_INOTIFY:
2356 s->enabled = m;
2357 break;
2358
2359 default:
2360 assert_not_reached("Wut? I shouldn't exist.");
2361 }
2362 }
2363
2364 if (s->pending)
2365 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2366
2367 if (s->prepare)
2368 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2369
2370 return 0;
2371 }
2372
2373 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2374 assert_return(s, -EINVAL);
2375 assert_return(usec, -EINVAL);
2376 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2377 assert_return(!event_pid_changed(s->event), -ECHILD);
2378
2379 *usec = s->time.next;
2380 return 0;
2381 }
2382
2383 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2384 struct clock_data *d;
2385 int r;
2386
2387 assert_return(s, -EINVAL);
2388 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2389 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2390 assert_return(!event_pid_changed(s->event), -ECHILD);
2391
2392 r = source_set_pending(s, false);
2393 if (r < 0)
2394 return r;
2395
2396 s->time.next = usec;
2397
2398 d = event_get_clock_data(s->event, s->type);
2399 assert(d);
2400
2401 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2402 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2403 d->needs_rearm = true;
2404
2405 return 0;
2406 }
2407
2408 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2409 assert_return(s, -EINVAL);
2410 assert_return(usec, -EINVAL);
2411 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2412 assert_return(!event_pid_changed(s->event), -ECHILD);
2413
2414 *usec = s->time.accuracy;
2415 return 0;
2416 }
2417
2418 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2419 struct clock_data *d;
2420 int r;
2421
2422 assert_return(s, -EINVAL);
2423 assert_return(usec != (uint64_t) -1, -EINVAL);
2424 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2425 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2426 assert_return(!event_pid_changed(s->event), -ECHILD);
2427
2428 r = source_set_pending(s, false);
2429 if (r < 0)
2430 return r;
2431
2432 if (usec == 0)
2433 usec = DEFAULT_ACCURACY_USEC;
2434
2435 s->time.accuracy = usec;
2436
2437 d = event_get_clock_data(s->event, s->type);
2438 assert(d);
2439
2440 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2441 d->needs_rearm = true;
2442
2443 return 0;
2444 }
2445
2446 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2447 assert_return(s, -EINVAL);
2448 assert_return(clock, -EINVAL);
2449 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2450 assert_return(!event_pid_changed(s->event), -ECHILD);
2451
2452 *clock = event_source_type_to_clock(s->type);
2453 return 0;
2454 }
2455
2456 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2457 assert_return(s, -EINVAL);
2458 assert_return(pid, -EINVAL);
2459 assert_return(s->type == SOURCE_CHILD, -EDOM);
2460 assert_return(!event_pid_changed(s->event), -ECHILD);
2461
2462 *pid = s->child.pid;
2463 return 0;
2464 }
2465
2466 _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2467 assert_return(s, -EINVAL);
2468 assert_return(s->type == SOURCE_CHILD, -EDOM);
2469 assert_return(!event_pid_changed(s->event), -ECHILD);
2470
2471 if (s->child.pidfd < 0)
2472 return -EOPNOTSUPP;
2473
2474 return s->child.pidfd;
2475 }
2476
2477 _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2478 assert_return(s, -EINVAL);
2479 assert_return(s->type == SOURCE_CHILD, -EDOM);
2480 assert_return(!event_pid_changed(s->event), -ECHILD);
2481 assert_return(SIGNAL_VALID(sig), -EINVAL);
2482
2483 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2484 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2485 * available. */
2486 if (s->child.exited)
2487 return -ESRCH;
2488
2489 if (s->child.pidfd >= 0) {
2490 siginfo_t copy;
2491
2492 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2493 * structure here */
2494 if (si)
2495 copy = *si;
2496
2497 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2498 /* Let's propagate the error only if the system call is not implemented or prohibited */
2499 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2500 return -errno;
2501 } else
2502 return 0;
2503 }
2504
2505 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2506 * this here. */
2507 if (flags != 0)
2508 return -EOPNOTSUPP;
2509
2510 if (si) {
2511 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2512 siginfo_t copy = *si;
2513
2514 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2515 return -errno;
2516 } else if (kill(s->child.pid, sig) < 0)
2517 return -errno;
2518
2519 return 0;
2520 }
2521
2522 _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2523 assert_return(s, -EINVAL);
2524 assert_return(s->type == SOURCE_CHILD, -EDOM);
2525
2526 if (s->child.pidfd < 0)
2527 return -EOPNOTSUPP;
2528
2529 return s->child.pidfd_owned;
2530 }
2531
2532 _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2533 assert_return(s, -EINVAL);
2534 assert_return(s->type == SOURCE_CHILD, -EDOM);
2535
2536 if (s->child.pidfd < 0)
2537 return -EOPNOTSUPP;
2538
2539 s->child.pidfd_owned = own;
2540 return 0;
2541 }
2542
2543 _public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2544 assert_return(s, -EINVAL);
2545 assert_return(s->type == SOURCE_CHILD, -EDOM);
2546
2547 return s->child.process_owned;
2548 }
2549
2550 _public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2551 assert_return(s, -EINVAL);
2552 assert_return(s->type == SOURCE_CHILD, -EDOM);
2553
2554 s->child.process_owned = own;
2555 return 0;
2556 }
2557
2558 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2559 assert_return(s, -EINVAL);
2560 assert_return(mask, -EINVAL);
2561 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2562 assert_return(!event_pid_changed(s->event), -ECHILD);
2563
2564 *mask = s->inotify.mask;
2565 return 0;
2566 }
2567
2568 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2569 int r;
2570
2571 assert_return(s, -EINVAL);
2572 assert_return(s->type != SOURCE_EXIT, -EDOM);
2573 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2574 assert_return(!event_pid_changed(s->event), -ECHILD);
2575
2576 if (s->prepare == callback)
2577 return 0;
2578
2579 if (callback && s->prepare) {
2580 s->prepare = callback;
2581 return 0;
2582 }
2583
2584 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2585 if (r < 0)
2586 return r;
2587
2588 s->prepare = callback;
2589
2590 if (callback) {
2591 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2592 if (r < 0)
2593 return r;
2594 } else
2595 prioq_remove(s->event->prepare, s, &s->prepare_index);
2596
2597 return 0;
2598 }
2599
2600 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2601 assert_return(s, NULL);
2602
2603 return s->userdata;
2604 }
2605
2606 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2607 void *ret;
2608
2609 assert_return(s, NULL);
2610
2611 ret = s->userdata;
2612 s->userdata = userdata;
2613
2614 return ret;
2615 }
2616
2617 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2618 usec_t c;
2619 assert(e);
2620 assert(a <= b);
2621
2622 if (a <= 0)
2623 return 0;
2624 if (a >= USEC_INFINITY)
2625 return USEC_INFINITY;
2626
2627 if (b <= a + 1)
2628 return a;
2629
2630 initialize_perturb(e);
2631
2632 /*
2633 Find a good time to wake up again between times a and b. We
2634 have two goals here:
2635
2636 a) We want to wake up as seldom as possible, hence prefer
2637 later times over earlier times.
2638
2639 b) But if we have to wake up, then let's make sure to
2640 dispatch as much as possible on the entire system.
2641
2642 We implement this by waking up everywhere at the same time
2643 within any given minute if we can, synchronised via the
2644 perturbation value determined from the boot ID. If we can't,
2645 then we try to find the same spot in every 10s, then 1s and
2646 then 250ms step. Otherwise, we pick the last possible time
2647 to wake up.
2648 */
2649
2650 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2651 if (c >= b) {
2652 if (_unlikely_(c < USEC_PER_MINUTE))
2653 return b;
2654
2655 c -= USEC_PER_MINUTE;
2656 }
2657
2658 if (c >= a)
2659 return c;
2660
2661 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2662 if (c >= b) {
2663 if (_unlikely_(c < USEC_PER_SEC*10))
2664 return b;
2665
2666 c -= USEC_PER_SEC*10;
2667 }
2668
2669 if (c >= a)
2670 return c;
2671
2672 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2673 if (c >= b) {
2674 if (_unlikely_(c < USEC_PER_SEC))
2675 return b;
2676
2677 c -= USEC_PER_SEC;
2678 }
2679
2680 if (c >= a)
2681 return c;
2682
2683 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2684 if (c >= b) {
2685 if (_unlikely_(c < USEC_PER_MSEC*250))
2686 return b;
2687
2688 c -= USEC_PER_MSEC*250;
2689 }
2690
2691 if (c >= a)
2692 return c;
2693
2694 return b;
2695 }
2696
2697 static int event_arm_timer(
2698 sd_event *e,
2699 struct clock_data *d) {
2700
2701 struct itimerspec its = {};
2702 sd_event_source *a, *b;
2703 usec_t t;
2704 int r;
2705
2706 assert(e);
2707 assert(d);
2708
2709 if (!d->needs_rearm)
2710 return 0;
2711 else
2712 d->needs_rearm = false;
2713
2714 a = prioq_peek(d->earliest);
2715 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2716
2717 if (d->fd < 0)
2718 return 0;
2719
2720 if (d->next == USEC_INFINITY)
2721 return 0;
2722
2723 /* disarm */
2724 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2725 if (r < 0)
2726 return r;
2727
2728 d->next = USEC_INFINITY;
2729 return 0;
2730 }
2731
2732 b = prioq_peek(d->latest);
2733 assert_se(b && b->enabled != SD_EVENT_OFF);
2734
2735 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2736 if (d->next == t)
2737 return 0;
2738
2739 assert_se(d->fd >= 0);
2740
2741 if (t == 0) {
2742 /* We don' want to disarm here, just mean some time looooong ago. */
2743 its.it_value.tv_sec = 0;
2744 its.it_value.tv_nsec = 1;
2745 } else
2746 timespec_store(&its.it_value, t);
2747
2748 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2749 if (r < 0)
2750 return -errno;
2751
2752 d->next = t;
2753 return 0;
2754 }
2755
2756 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2757 assert(e);
2758 assert(s);
2759 assert(s->type == SOURCE_IO);
2760
2761 /* If the event source was already pending, we just OR in the
2762 * new revents, otherwise we reset the value. The ORing is
2763 * necessary to handle EPOLLONESHOT events properly where
2764 * readability might happen independently of writability, and
2765 * we need to keep track of both */
2766
2767 if (s->pending)
2768 s->io.revents |= revents;
2769 else
2770 s->io.revents = revents;
2771
2772 return source_set_pending(s, true);
2773 }
2774
2775 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2776 uint64_t x;
2777 ssize_t ss;
2778
2779 assert(e);
2780 assert(fd >= 0);
2781
2782 assert_return(events == EPOLLIN, -EIO);
2783
2784 ss = read(fd, &x, sizeof(x));
2785 if (ss < 0) {
2786 if (IN_SET(errno, EAGAIN, EINTR))
2787 return 0;
2788
2789 return -errno;
2790 }
2791
2792 if (_unlikely_(ss != sizeof(x)))
2793 return -EIO;
2794
2795 if (next)
2796 *next = USEC_INFINITY;
2797
2798 return 0;
2799 }
2800
2801 static int process_timer(
2802 sd_event *e,
2803 usec_t n,
2804 struct clock_data *d) {
2805
2806 sd_event_source *s;
2807 int r;
2808
2809 assert(e);
2810 assert(d);
2811
2812 for (;;) {
2813 s = prioq_peek(d->earliest);
2814 if (!s ||
2815 s->time.next > n ||
2816 s->enabled == SD_EVENT_OFF ||
2817 s->pending)
2818 break;
2819
2820 r = source_set_pending(s, true);
2821 if (r < 0)
2822 return r;
2823
2824 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2825 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2826 d->needs_rearm = true;
2827 }
2828
2829 return 0;
2830 }
2831
2832 static int process_child(sd_event *e) {
2833 sd_event_source *s;
2834 Iterator i;
2835 int r;
2836
2837 assert(e);
2838
2839 e->need_process_child = false;
2840
2841 /*
2842 So, this is ugly. We iteratively invoke waitid() with P_PID
2843 + WNOHANG for each PID we wait for, instead of using
2844 P_ALL. This is because we only want to get child
2845 information of very specific child processes, and not all
2846 of them. We might not have processed the SIGCHLD even of a
2847 previous invocation and we don't want to maintain a
2848 unbounded *per-child* event queue, hence we really don't
2849 want anything flushed out of the kernel's queue that we
2850 don't care about. Since this is O(n) this means that if you
2851 have a lot of processes you probably want to handle SIGCHLD
2852 yourself.
2853
2854 We do not reap the children here (by using WNOWAIT), this
2855 is only done after the event source is dispatched so that
2856 the callback still sees the process as a zombie.
2857 */
2858
2859 HASHMAP_FOREACH(s, e->child_sources, i) {
2860 assert(s->type == SOURCE_CHILD);
2861
2862 if (s->pending)
2863 continue;
2864
2865 if (s->enabled == SD_EVENT_OFF)
2866 continue;
2867
2868 if (s->child.exited)
2869 continue;
2870
2871 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2872 continue;
2873
2874 zero(s->child.siginfo);
2875 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2876 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2877 if (r < 0)
2878 return -errno;
2879
2880 if (s->child.siginfo.si_pid != 0) {
2881 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2882
2883 if (zombie)
2884 s->child.exited = true;
2885
2886 if (!zombie && (s->child.options & WEXITED)) {
2887 /* If the child isn't dead then let's
2888 * immediately remove the state change
2889 * from the queue, since there's no
2890 * benefit in leaving it queued */
2891
2892 assert(s->child.options & (WSTOPPED|WCONTINUED));
2893 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2894 }
2895
2896 r = source_set_pending(s, true);
2897 if (r < 0)
2898 return r;
2899 }
2900 }
2901
2902 return 0;
2903 }
2904
2905 static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2906 assert(e);
2907 assert(s);
2908 assert(s->type == SOURCE_CHILD);
2909
2910 if (s->pending)
2911 return 0;
2912
2913 if (s->enabled == SD_EVENT_OFF)
2914 return 0;
2915
2916 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2917 return 0;
2918
2919 zero(s->child.siginfo);
2920 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2921 return -errno;
2922
2923 if (s->child.siginfo.si_pid == 0)
2924 return 0;
2925
2926 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2927 s->child.exited = true;
2928
2929 return source_set_pending(s, true);
2930 }
2931
2932 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2933 bool read_one = false;
2934 int r;
2935
2936 assert(e);
2937 assert(d);
2938 assert_return(events == EPOLLIN, -EIO);
2939
2940 /* If there's a signal queued on this priority and SIGCHLD is
2941 on this priority too, then make sure to recheck the
2942 children we watch. This is because we only ever dequeue
2943 the first signal per priority, and if we dequeue one, and
2944 SIGCHLD might be enqueued later we wouldn't know, but we
2945 might have higher priority children we care about hence we
2946 need to check that explicitly. */
2947
2948 if (sigismember(&d->sigset, SIGCHLD))
2949 e->need_process_child = true;
2950
2951 /* If there's already an event source pending for this
2952 * priority we don't read another */
2953 if (d->current)
2954 return 0;
2955
2956 for (;;) {
2957 struct signalfd_siginfo si;
2958 ssize_t n;
2959 sd_event_source *s = NULL;
2960
2961 n = read(d->fd, &si, sizeof(si));
2962 if (n < 0) {
2963 if (IN_SET(errno, EAGAIN, EINTR))
2964 return read_one;
2965
2966 return -errno;
2967 }
2968
2969 if (_unlikely_(n != sizeof(si)))
2970 return -EIO;
2971
2972 assert(SIGNAL_VALID(si.ssi_signo));
2973
2974 read_one = true;
2975
2976 if (e->signal_sources)
2977 s = e->signal_sources[si.ssi_signo];
2978 if (!s)
2979 continue;
2980 if (s->pending)
2981 continue;
2982
2983 s->signal.siginfo = si;
2984 d->current = s;
2985
2986 r = source_set_pending(s, true);
2987 if (r < 0)
2988 return r;
2989
2990 return 1;
2991 }
2992 }
2993
2994 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2995 ssize_t n;
2996
2997 assert(e);
2998 assert(d);
2999
3000 assert_return(revents == EPOLLIN, -EIO);
3001
3002 /* If there's already an event source pending for this priority, don't read another */
3003 if (d->n_pending > 0)
3004 return 0;
3005
3006 /* Is the read buffer non-empty? If so, let's not read more */
3007 if (d->buffer_filled > 0)
3008 return 0;
3009
3010 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3011 if (n < 0) {
3012 if (IN_SET(errno, EAGAIN, EINTR))
3013 return 0;
3014
3015 return -errno;
3016 }
3017
3018 assert(n > 0);
3019 d->buffer_filled = (size_t) n;
3020 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3021
3022 return 1;
3023 }
3024
3025 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3026 assert(e);
3027 assert(d);
3028 assert(sz <= d->buffer_filled);
3029
3030 if (sz == 0)
3031 return;
3032
3033 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3034 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3035 d->buffer_filled -= sz;
3036
3037 if (d->buffer_filled == 0)
3038 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3039 }
3040
3041 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3042 int r;
3043
3044 assert(e);
3045 assert(d);
3046
3047 /* If there's already an event source pending for this priority, don't read another */
3048 if (d->n_pending > 0)
3049 return 0;
3050
3051 while (d->buffer_filled > 0) {
3052 size_t sz;
3053
3054 /* Let's validate that the event structures are complete */
3055 if (d->buffer_filled < offsetof(struct inotify_event, name))
3056 return -EIO;
3057
3058 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3059 if (d->buffer_filled < sz)
3060 return -EIO;
3061
3062 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3063 struct inode_data *inode_data;
3064 Iterator i;
3065
3066 /* The queue overran, let's pass this event to all event sources connected to this inotify
3067 * object */
3068
3069 HASHMAP_FOREACH(inode_data, d->inodes, i) {
3070 sd_event_source *s;
3071
3072 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3073
3074 if (s->enabled == SD_EVENT_OFF)
3075 continue;
3076
3077 r = source_set_pending(s, true);
3078 if (r < 0)
3079 return r;
3080 }
3081 }
3082 } else {
3083 struct inode_data *inode_data;
3084 sd_event_source *s;
3085
3086 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3087 * our watch descriptor table. */
3088 if (d->buffer.ev.mask & IN_IGNORED) {
3089
3090 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3091 if (!inode_data) {
3092 event_inotify_data_drop(e, d, sz);
3093 continue;
3094 }
3095
3096 /* The watch descriptor was removed by the kernel, let's drop it here too */
3097 inode_data->wd = -1;
3098 } else {
3099 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3100 if (!inode_data) {
3101 event_inotify_data_drop(e, d, sz);
3102 continue;
3103 }
3104 }
3105
3106 /* Trigger all event sources that are interested in these events. Also trigger all event
3107 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3108 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3109
3110 if (s->enabled == SD_EVENT_OFF)
3111 continue;
3112
3113 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3114 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3115 continue;
3116
3117 r = source_set_pending(s, true);
3118 if (r < 0)
3119 return r;
3120 }
3121 }
3122
3123 /* Something pending now? If so, let's finish, otherwise let's read more. */
3124 if (d->n_pending > 0)
3125 return 1;
3126 }
3127
3128 return 0;
3129 }
3130
3131 static int process_inotify(sd_event *e) {
3132 struct inotify_data *d;
3133 int r, done = 0;
3134
3135 assert(e);
3136
3137 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3138 r = event_inotify_data_process(e, d);
3139 if (r < 0)
3140 return r;
3141 if (r > 0)
3142 done ++;
3143 }
3144
3145 return done;
3146 }
3147
3148 static int source_dispatch(sd_event_source *s) {
3149 EventSourceType saved_type;
3150 int r = 0;
3151
3152 assert(s);
3153 assert(s->pending || s->type == SOURCE_EXIT);
3154
3155 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3156 * the event. */
3157 saved_type = s->type;
3158
3159 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3160 r = source_set_pending(s, false);
3161 if (r < 0)
3162 return r;
3163 }
3164
3165 if (s->type != SOURCE_POST) {
3166 sd_event_source *z;
3167 Iterator i;
3168
3169 /* If we execute a non-post source, let's mark all
3170 * post sources as pending */
3171
3172 SET_FOREACH(z, s->event->post_sources, i) {
3173 if (z->enabled == SD_EVENT_OFF)
3174 continue;
3175
3176 r = source_set_pending(z, true);
3177 if (r < 0)
3178 return r;
3179 }
3180 }
3181
3182 if (s->enabled == SD_EVENT_ONESHOT) {
3183 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3184 if (r < 0)
3185 return r;
3186 }
3187
3188 s->dispatching = true;
3189
3190 switch (s->type) {
3191
3192 case SOURCE_IO:
3193 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3194 break;
3195
3196 case SOURCE_TIME_REALTIME:
3197 case SOURCE_TIME_BOOTTIME:
3198 case SOURCE_TIME_MONOTONIC:
3199 case SOURCE_TIME_REALTIME_ALARM:
3200 case SOURCE_TIME_BOOTTIME_ALARM:
3201 r = s->time.callback(s, s->time.next, s->userdata);
3202 break;
3203
3204 case SOURCE_SIGNAL:
3205 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3206 break;
3207
3208 case SOURCE_CHILD: {
3209 bool zombie;
3210
3211 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3212
3213 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3214
3215 /* Now, reap the PID for good. */
3216 if (zombie) {
3217 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3218 s->child.waited = true;
3219 }
3220
3221 break;
3222 }
3223
3224 case SOURCE_DEFER:
3225 r = s->defer.callback(s, s->userdata);
3226 break;
3227
3228 case SOURCE_POST:
3229 r = s->post.callback(s, s->userdata);
3230 break;
3231
3232 case SOURCE_EXIT:
3233 r = s->exit.callback(s, s->userdata);
3234 break;
3235
3236 case SOURCE_INOTIFY: {
3237 struct sd_event *e = s->event;
3238 struct inotify_data *d;
3239 size_t sz;
3240
3241 assert(s->inotify.inode_data);
3242 assert_se(d = s->inotify.inode_data->inotify_data);
3243
3244 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3245 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3246 assert(d->buffer_filled >= sz);
3247
3248 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3249
3250 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3251 * buffer. */
3252 if (d->n_pending == 0)
3253 event_inotify_data_drop(e, d, sz);
3254
3255 break;
3256 }
3257
3258 case SOURCE_WATCHDOG:
3259 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3260 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3261 assert_not_reached("Wut? I shouldn't exist.");
3262 }
3263
3264 s->dispatching = false;
3265
3266 if (r < 0)
3267 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3268 strna(s->description), event_source_type_to_string(saved_type));
3269
3270 if (s->n_ref == 0)
3271 source_free(s);
3272 else if (r < 0)
3273 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3274
3275 return 1;
3276 }
3277
3278 static int event_prepare(sd_event *e) {
3279 int r;
3280
3281 assert(e);
3282
3283 for (;;) {
3284 sd_event_source *s;
3285
3286 s = prioq_peek(e->prepare);
3287 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3288 break;
3289
3290 s->prepare_iteration = e->iteration;
3291 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3292 if (r < 0)
3293 return r;
3294
3295 assert(s->prepare);
3296
3297 s->dispatching = true;
3298 r = s->prepare(s, s->userdata);
3299 s->dispatching = false;
3300
3301 if (r < 0)
3302 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3303 strna(s->description), event_source_type_to_string(s->type));
3304
3305 if (s->n_ref == 0)
3306 source_free(s);
3307 else if (r < 0)
3308 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3309 }
3310
3311 return 0;
3312 }
3313
3314 static int dispatch_exit(sd_event *e) {
3315 sd_event_source *p;
3316 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3317 int r;
3318
3319 assert(e);
3320
3321 p = prioq_peek(e->exit);
3322 if (!p || p->enabled == SD_EVENT_OFF) {
3323 e->state = SD_EVENT_FINISHED;
3324 return 0;
3325 }
3326
3327 ref = sd_event_ref(e);
3328 e->iteration++;
3329 e->state = SD_EVENT_EXITING;
3330 r = source_dispatch(p);
3331 e->state = SD_EVENT_INITIAL;
3332 return r;
3333 }
3334
3335 static sd_event_source* event_next_pending(sd_event *e) {
3336 sd_event_source *p;
3337
3338 assert(e);
3339
3340 p = prioq_peek(e->pending);
3341 if (!p)
3342 return NULL;
3343
3344 if (p->enabled == SD_EVENT_OFF)
3345 return NULL;
3346
3347 return p;
3348 }
3349
3350 static int arm_watchdog(sd_event *e) {
3351 struct itimerspec its = {};
3352 usec_t t;
3353 int r;
3354
3355 assert(e);
3356 assert(e->watchdog_fd >= 0);
3357
3358 t = sleep_between(e,
3359 e->watchdog_last + (e->watchdog_period / 2),
3360 e->watchdog_last + (e->watchdog_period * 3 / 4));
3361
3362 timespec_store(&its.it_value, t);
3363
3364 /* Make sure we never set the watchdog to 0, which tells the
3365 * kernel to disable it. */
3366 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3367 its.it_value.tv_nsec = 1;
3368
3369 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3370 if (r < 0)
3371 return -errno;
3372
3373 return 0;
3374 }
3375
3376 static int process_watchdog(sd_event *e) {
3377 assert(e);
3378
3379 if (!e->watchdog)
3380 return 0;
3381
3382 /* Don't notify watchdog too often */
3383 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3384 return 0;
3385
3386 sd_notify(false, "WATCHDOG=1");
3387 e->watchdog_last = e->timestamp.monotonic;
3388
3389 return arm_watchdog(e);
3390 }
3391
3392 static void event_close_inode_data_fds(sd_event *e) {
3393 struct inode_data *d;
3394
3395 assert(e);
3396
3397 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3398 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3399 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3400 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3401 * compromise. */
3402
3403 while ((d = e->inode_data_to_close)) {
3404 assert(d->fd >= 0);
3405 d->fd = safe_close(d->fd);
3406
3407 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3408 }
3409 }
3410
3411 _public_ int sd_event_prepare(sd_event *e) {
3412 int r;
3413
3414 assert_return(e, -EINVAL);
3415 assert_return(e = event_resolve(e), -ENOPKG);
3416 assert_return(!event_pid_changed(e), -ECHILD);
3417 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3418 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3419
3420 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3421 * this check here once, since gettid() is typically not cached, and thus want to minimize
3422 * syscalls */
3423 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3424
3425 if (e->exit_requested)
3426 goto pending;
3427
3428 e->iteration++;
3429
3430 e->state = SD_EVENT_PREPARING;
3431 r = event_prepare(e);
3432 e->state = SD_EVENT_INITIAL;
3433 if (r < 0)
3434 return r;
3435
3436 r = event_arm_timer(e, &e->realtime);
3437 if (r < 0)
3438 return r;
3439
3440 r = event_arm_timer(e, &e->boottime);
3441 if (r < 0)
3442 return r;
3443
3444 r = event_arm_timer(e, &e->monotonic);
3445 if (r < 0)
3446 return r;
3447
3448 r = event_arm_timer(e, &e->realtime_alarm);
3449 if (r < 0)
3450 return r;
3451
3452 r = event_arm_timer(e, &e->boottime_alarm);
3453 if (r < 0)
3454 return r;
3455
3456 event_close_inode_data_fds(e);
3457
3458 if (event_next_pending(e) || e->need_process_child)
3459 goto pending;
3460
3461 e->state = SD_EVENT_ARMED;
3462
3463 return 0;
3464
3465 pending:
3466 e->state = SD_EVENT_ARMED;
3467 r = sd_event_wait(e, 0);
3468 if (r == 0)
3469 e->state = SD_EVENT_ARMED;
3470
3471 return r;
3472 }
3473
3474 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3475 size_t event_queue_max;
3476 int r, m, i;
3477
3478 assert_return(e, -EINVAL);
3479 assert_return(e = event_resolve(e), -ENOPKG);
3480 assert_return(!event_pid_changed(e), -ECHILD);
3481 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3482 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3483
3484 if (e->exit_requested) {
3485 e->state = SD_EVENT_PENDING;
3486 return 1;
3487 }
3488
3489 event_queue_max = MAX(e->n_sources, 1u);
3490 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3491 return -ENOMEM;
3492
3493 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3494 if (e->inotify_data_buffered)
3495 timeout = 0;
3496
3497 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
3498 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3499 if (m < 0) {
3500 if (errno == EINTR) {
3501 e->state = SD_EVENT_PENDING;
3502 return 1;
3503 }
3504
3505 r = -errno;
3506 goto finish;
3507 }
3508
3509 triple_timestamp_get(&e->timestamp);
3510
3511 for (i = 0; i < m; i++) {
3512
3513 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3514 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
3515 else {
3516 WakeupType *t = e->event_queue[i].data.ptr;
3517
3518 switch (*t) {
3519
3520 case WAKEUP_EVENT_SOURCE: {
3521 sd_event_source *s = e->event_queue[i].data.ptr;
3522
3523 assert(s);
3524
3525 switch (s->type) {
3526
3527 case SOURCE_IO:
3528 r = process_io(e, s, e->event_queue[i].events);
3529 break;
3530
3531 case SOURCE_CHILD:
3532 r = process_pidfd(e, s, e->event_queue[i].events);
3533 break;
3534
3535 default:
3536 assert_not_reached("Unexpected event source type");
3537 }
3538
3539 break;
3540 }
3541
3542 case WAKEUP_CLOCK_DATA: {
3543 struct clock_data *d = e->event_queue[i].data.ptr;
3544
3545 assert(d);
3546
3547 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
3548 break;
3549 }
3550
3551 case WAKEUP_SIGNAL_DATA:
3552 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3553 break;
3554
3555 case WAKEUP_INOTIFY_DATA:
3556 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3557 break;
3558
3559 default:
3560 assert_not_reached("Invalid wake-up pointer");
3561 }
3562 }
3563 if (r < 0)
3564 goto finish;
3565 }
3566
3567 r = process_watchdog(e);
3568 if (r < 0)
3569 goto finish;
3570
3571 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3572 if (r < 0)
3573 goto finish;
3574
3575 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3576 if (r < 0)
3577 goto finish;
3578
3579 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3580 if (r < 0)
3581 goto finish;
3582
3583 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3584 if (r < 0)
3585 goto finish;
3586
3587 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3588 if (r < 0)
3589 goto finish;
3590
3591 if (e->need_process_child) {
3592 r = process_child(e);
3593 if (r < 0)
3594 goto finish;
3595 }
3596
3597 r = process_inotify(e);
3598 if (r < 0)
3599 goto finish;
3600
3601 if (event_next_pending(e)) {
3602 e->state = SD_EVENT_PENDING;
3603
3604 return 1;
3605 }
3606
3607 r = 0;
3608
3609 finish:
3610 e->state = SD_EVENT_INITIAL;
3611
3612 return r;
3613 }
3614
3615 _public_ int sd_event_dispatch(sd_event *e) {
3616 sd_event_source *p;
3617 int r;
3618
3619 assert_return(e, -EINVAL);
3620 assert_return(e = event_resolve(e), -ENOPKG);
3621 assert_return(!event_pid_changed(e), -ECHILD);
3622 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3623 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3624
3625 if (e->exit_requested)
3626 return dispatch_exit(e);
3627
3628 p = event_next_pending(e);
3629 if (p) {
3630 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3631
3632 ref = sd_event_ref(e);
3633 e->state = SD_EVENT_RUNNING;
3634 r = source_dispatch(p);
3635 e->state = SD_EVENT_INITIAL;
3636 return r;
3637 }
3638
3639 e->state = SD_EVENT_INITIAL;
3640
3641 return 1;
3642 }
3643
3644 static void event_log_delays(sd_event *e) {
3645 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3646 size_t l, i;
3647
3648 p = b;
3649 l = sizeof(b);
3650 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3651 l = strpcpyf(&p, l, "%u ", e->delays[i]);
3652 e->delays[i] = 0;
3653 }
3654 log_debug("Event loop iterations: %s", b);
3655 }
3656
3657 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3658 int r;
3659
3660 assert_return(e, -EINVAL);
3661 assert_return(e = event_resolve(e), -ENOPKG);
3662 assert_return(!event_pid_changed(e), -ECHILD);
3663 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3664 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3665
3666 if (e->profile_delays && e->last_run) {
3667 usec_t this_run;
3668 unsigned l;
3669
3670 this_run = now(CLOCK_MONOTONIC);
3671
3672 l = u64log2(this_run - e->last_run);
3673 assert(l < sizeof(e->delays));
3674 e->delays[l]++;
3675
3676 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3677 event_log_delays(e);
3678 e->last_log = this_run;
3679 }
3680 }
3681
3682 r = sd_event_prepare(e);
3683 if (r == 0)
3684 /* There was nothing? Then wait... */
3685 r = sd_event_wait(e, timeout);
3686
3687 if (e->profile_delays)
3688 e->last_run = now(CLOCK_MONOTONIC);
3689
3690 if (r > 0) {
3691 /* There's something now, then let's dispatch it */
3692 r = sd_event_dispatch(e);
3693 if (r < 0)
3694 return r;
3695
3696 return 1;
3697 }
3698
3699 return r;
3700 }
3701
3702 _public_ int sd_event_loop(sd_event *e) {
3703 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3704 int r;
3705
3706 assert_return(e, -EINVAL);
3707 assert_return(e = event_resolve(e), -ENOPKG);
3708 assert_return(!event_pid_changed(e), -ECHILD);
3709 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3710
3711 ref = sd_event_ref(e);
3712
3713 while (e->state != SD_EVENT_FINISHED) {
3714 r = sd_event_run(e, (uint64_t) -1);
3715 if (r < 0)
3716 return r;
3717 }
3718
3719 return e->exit_code;
3720 }
3721
3722 _public_ int sd_event_get_fd(sd_event *e) {
3723
3724 assert_return(e, -EINVAL);
3725 assert_return(e = event_resolve(e), -ENOPKG);
3726 assert_return(!event_pid_changed(e), -ECHILD);
3727
3728 return e->epoll_fd;
3729 }
3730
3731 _public_ int sd_event_get_state(sd_event *e) {
3732 assert_return(e, -EINVAL);
3733 assert_return(e = event_resolve(e), -ENOPKG);
3734 assert_return(!event_pid_changed(e), -ECHILD);
3735
3736 return e->state;
3737 }
3738
3739 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3740 assert_return(e, -EINVAL);
3741 assert_return(e = event_resolve(e), -ENOPKG);
3742 assert_return(code, -EINVAL);
3743 assert_return(!event_pid_changed(e), -ECHILD);
3744
3745 if (!e->exit_requested)
3746 return -ENODATA;
3747
3748 *code = e->exit_code;
3749 return 0;
3750 }
3751
3752 _public_ int sd_event_exit(sd_event *e, int code) {
3753 assert_return(e, -EINVAL);
3754 assert_return(e = event_resolve(e), -ENOPKG);
3755 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3756 assert_return(!event_pid_changed(e), -ECHILD);
3757
3758 e->exit_requested = true;
3759 e->exit_code = code;
3760
3761 return 0;
3762 }
3763
3764 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3765 assert_return(e, -EINVAL);
3766 assert_return(e = event_resolve(e), -ENOPKG);
3767 assert_return(usec, -EINVAL);
3768 assert_return(!event_pid_changed(e), -ECHILD);
3769
3770 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3771 return -EOPNOTSUPP;
3772
3773 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3774 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3775 * the purpose of getting the time this doesn't matter. */
3776 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3777 return -EOPNOTSUPP;
3778
3779 if (!triple_timestamp_is_set(&e->timestamp)) {
3780 /* Implicitly fall back to now() if we never ran
3781 * before and thus have no cached time. */
3782 *usec = now(clock);
3783 return 1;
3784 }
3785
3786 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3787 return 0;
3788 }
3789
3790 _public_ int sd_event_default(sd_event **ret) {
3791 sd_event *e = NULL;
3792 int r;
3793
3794 if (!ret)
3795 return !!default_event;
3796
3797 if (default_event) {
3798 *ret = sd_event_ref(default_event);
3799 return 0;
3800 }
3801
3802 r = sd_event_new(&e);
3803 if (r < 0)
3804 return r;
3805
3806 e->default_event_ptr = &default_event;
3807 e->tid = gettid();
3808 default_event = e;
3809
3810 *ret = e;
3811 return 1;
3812 }
3813
3814 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3815 assert_return(e, -EINVAL);
3816 assert_return(e = event_resolve(e), -ENOPKG);
3817 assert_return(tid, -EINVAL);
3818 assert_return(!event_pid_changed(e), -ECHILD);
3819
3820 if (e->tid != 0) {
3821 *tid = e->tid;
3822 return 0;
3823 }
3824
3825 return -ENXIO;
3826 }
3827
3828 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3829 int r;
3830
3831 assert_return(e, -EINVAL);
3832 assert_return(e = event_resolve(e), -ENOPKG);
3833 assert_return(!event_pid_changed(e), -ECHILD);
3834
3835 if (e->watchdog == !!b)
3836 return e->watchdog;
3837
3838 if (b) {
3839 r = sd_watchdog_enabled(false, &e->watchdog_period);
3840 if (r <= 0)
3841 return r;
3842
3843 /* Issue first ping immediately */
3844 sd_notify(false, "WATCHDOG=1");
3845 e->watchdog_last = now(CLOCK_MONOTONIC);
3846
3847 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3848 if (e->watchdog_fd < 0)
3849 return -errno;
3850
3851 r = arm_watchdog(e);
3852 if (r < 0)
3853 goto fail;
3854
3855 struct epoll_event ev = {
3856 .events = EPOLLIN,
3857 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3858 };
3859
3860 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3861 if (r < 0) {
3862 r = -errno;
3863 goto fail;
3864 }
3865
3866 } else {
3867 if (e->watchdog_fd >= 0) {
3868 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3869 e->watchdog_fd = safe_close(e->watchdog_fd);
3870 }
3871 }
3872
3873 e->watchdog = !!b;
3874 return e->watchdog;
3875
3876 fail:
3877 e->watchdog_fd = safe_close(e->watchdog_fd);
3878 return r;
3879 }
3880
3881 _public_ int sd_event_get_watchdog(sd_event *e) {
3882 assert_return(e, -EINVAL);
3883 assert_return(e = event_resolve(e), -ENOPKG);
3884 assert_return(!event_pid_changed(e), -ECHILD);
3885
3886 return e->watchdog;
3887 }
3888
3889 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3890 assert_return(e, -EINVAL);
3891 assert_return(e = event_resolve(e), -ENOPKG);
3892 assert_return(!event_pid_changed(e), -ECHILD);
3893
3894 *ret = e->iteration;
3895 return 0;
3896 }
3897
3898 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3899 assert_return(s, -EINVAL);
3900
3901 s->destroy_callback = callback;
3902 return 0;
3903 }
3904
3905 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3906 assert_return(s, -EINVAL);
3907
3908 if (ret)
3909 *ret = s->destroy_callback;
3910
3911 return !!s->destroy_callback;
3912 }
3913
3914 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3915 assert_return(s, -EINVAL);
3916
3917 return s->floating;
3918 }
3919
3920 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3921 assert_return(s, -EINVAL);
3922
3923 if (s->floating == !!b)
3924 return 0;
3925
3926 if (!s->event) /* Already disconnected */
3927 return -ESTALE;
3928
3929 s->floating = b;
3930
3931 if (b) {
3932 sd_event_source_ref(s);
3933 sd_event_unref(s->event);
3934 } else {
3935 sd_event_ref(s->event);
3936 sd_event_source_unref(s);
3937 }
3938
3939 return 1;
3940 }