]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Merge pull request #14235 from fbuihuu/cryptsetup-fixes
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "env-util.h"
13 #include "event-source.h"
14 #include "fd-util.h"
15 #include "fs-util.h"
16 #include "hashmap.h"
17 #include "list.h"
18 #include "macro.h"
19 #include "memory-util.h"
20 #include "missing_syscall.h"
21 #include "prioq.h"
22 #include "process-util.h"
23 #include "set.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "strxcpyx.h"
28 #include "time-util.h"
29
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38 }
39
40 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
53 [SOURCE_INOTIFY] = "inotify",
54 };
55
56 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event {
61 unsigned n_ref;
62
63 int epoll_fd;
64 int watchdog_fd;
65
66 Prioq *pending;
67 Prioq *prepare;
68
69 /* timerfd_create() only supports these five clocks so far. We
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
73 struct clock_data boottime;
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
77
78 usec_t perturb;
79
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
82
83 Hashmap *child_sources;
84 unsigned n_enabled_child_sources;
85
86 Set *post_sources;
87
88 Prioq *exit;
89
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
98 pid_t original_pid;
99
100 uint64_t iteration;
101 triple_timestamp timestamp;
102 int state;
103
104 bool exit_requested:1;
105 bool need_process_child:1;
106 bool watchdog:1;
107 bool profile_delays:1;
108
109 int exit_code;
110
111 pid_t tid;
112 sd_event **default_event_ptr;
113
114 usec_t watchdog_last, watchdog_period;
115
116 unsigned n_sources;
117
118 LIST_HEAD(sd_event_source, sources);
119
120 usec_t last_run, last_log;
121 unsigned delays[sizeof(usec_t) * 8];
122 };
123
124 static thread_local sd_event *default_event = NULL;
125
126 static void source_disconnect(sd_event_source *s);
127 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
128
129 static sd_event *event_resolve(sd_event *e) {
130 return e == SD_EVENT_DEFAULT ? default_event : e;
131 }
132
133 static int pending_prioq_compare(const void *a, const void *b) {
134 const sd_event_source *x = a, *y = b;
135 int r;
136
137 assert(x->pending);
138 assert(y->pending);
139
140 /* Enabled ones first */
141 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
142 return -1;
143 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
144 return 1;
145
146 /* Lower priority values first */
147 r = CMP(x->priority, y->priority);
148 if (r != 0)
149 return r;
150
151 /* Older entries first */
152 return CMP(x->pending_iteration, y->pending_iteration);
153 }
154
155 static int prepare_prioq_compare(const void *a, const void *b) {
156 const sd_event_source *x = a, *y = b;
157 int r;
158
159 assert(x->prepare);
160 assert(y->prepare);
161
162 /* Enabled ones first */
163 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
164 return -1;
165 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
166 return 1;
167
168 /* Move most recently prepared ones last, so that we can stop
169 * preparing as soon as we hit one that has already been
170 * prepared in the current iteration */
171 r = CMP(x->prepare_iteration, y->prepare_iteration);
172 if (r != 0)
173 return r;
174
175 /* Lower priority values first */
176 return CMP(x->priority, y->priority);
177 }
178
179 static int earliest_time_prioq_compare(const void *a, const void *b) {
180 const sd_event_source *x = a, *y = b;
181
182 assert(EVENT_SOURCE_IS_TIME(x->type));
183 assert(x->type == y->type);
184
185 /* Enabled ones first */
186 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
187 return -1;
188 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
189 return 1;
190
191 /* Move the pending ones to the end */
192 if (!x->pending && y->pending)
193 return -1;
194 if (x->pending && !y->pending)
195 return 1;
196
197 /* Order by time */
198 return CMP(x->time.next, y->time.next);
199 }
200
201 static usec_t time_event_source_latest(const sd_event_source *s) {
202 return usec_add(s->time.next, s->time.accuracy);
203 }
204
205 static int latest_time_prioq_compare(const void *a, const void *b) {
206 const sd_event_source *x = a, *y = b;
207
208 assert(EVENT_SOURCE_IS_TIME(x->type));
209 assert(x->type == y->type);
210
211 /* Enabled ones first */
212 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
213 return -1;
214 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
215 return 1;
216
217 /* Move the pending ones to the end */
218 if (!x->pending && y->pending)
219 return -1;
220 if (x->pending && !y->pending)
221 return 1;
222
223 /* Order by time */
224 return CMP(time_event_source_latest(x), time_event_source_latest(y));
225 }
226
227 static int exit_prioq_compare(const void *a, const void *b) {
228 const sd_event_source *x = a, *y = b;
229
230 assert(x->type == SOURCE_EXIT);
231 assert(y->type == SOURCE_EXIT);
232
233 /* Enabled ones first */
234 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
235 return -1;
236 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
237 return 1;
238
239 /* Lower priority values first */
240 return CMP(x->priority, y->priority);
241 }
242
243 static void free_clock_data(struct clock_data *d) {
244 assert(d);
245 assert(d->wakeup == WAKEUP_CLOCK_DATA);
246
247 safe_close(d->fd);
248 prioq_free(d->earliest);
249 prioq_free(d->latest);
250 }
251
252 static sd_event *event_free(sd_event *e) {
253 sd_event_source *s;
254
255 assert(e);
256
257 while ((s = e->sources)) {
258 assert(s->floating);
259 source_disconnect(s);
260 sd_event_source_unref(s);
261 }
262
263 assert(e->n_sources == 0);
264
265 if (e->default_event_ptr)
266 *(e->default_event_ptr) = NULL;
267
268 safe_close(e->epoll_fd);
269 safe_close(e->watchdog_fd);
270
271 free_clock_data(&e->realtime);
272 free_clock_data(&e->boottime);
273 free_clock_data(&e->monotonic);
274 free_clock_data(&e->realtime_alarm);
275 free_clock_data(&e->boottime_alarm);
276
277 prioq_free(e->pending);
278 prioq_free(e->prepare);
279 prioq_free(e->exit);
280
281 free(e->signal_sources);
282 hashmap_free(e->signal_data);
283
284 hashmap_free(e->inotify_data);
285
286 hashmap_free(e->child_sources);
287 set_free(e->post_sources);
288
289 return mfree(e);
290 }
291
292 _public_ int sd_event_new(sd_event** ret) {
293 sd_event *e;
294 int r;
295
296 assert_return(ret, -EINVAL);
297
298 e = new(sd_event, 1);
299 if (!e)
300 return -ENOMEM;
301
302 *e = (sd_event) {
303 .n_ref = 1,
304 .epoll_fd = -1,
305 .watchdog_fd = -1,
306 .realtime.wakeup = WAKEUP_CLOCK_DATA,
307 .realtime.fd = -1,
308 .realtime.next = USEC_INFINITY,
309 .boottime.wakeup = WAKEUP_CLOCK_DATA,
310 .boottime.fd = -1,
311 .boottime.next = USEC_INFINITY,
312 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
313 .monotonic.fd = -1,
314 .monotonic.next = USEC_INFINITY,
315 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
316 .realtime_alarm.fd = -1,
317 .realtime_alarm.next = USEC_INFINITY,
318 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
319 .boottime_alarm.fd = -1,
320 .boottime_alarm.next = USEC_INFINITY,
321 .perturb = USEC_INFINITY,
322 .original_pid = getpid_cached(),
323 };
324
325 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
326 if (r < 0)
327 goto fail;
328
329 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
330 if (e->epoll_fd < 0) {
331 r = -errno;
332 goto fail;
333 }
334
335 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
336
337 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
338 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
339 e->profile_delays = true;
340 }
341
342 *ret = e;
343 return 0;
344
345 fail:
346 event_free(e);
347 return r;
348 }
349
350 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
351
352 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
353 if (s)
354 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
355 return sd_event_source_unref(s);
356 }
357
358 static bool event_pid_changed(sd_event *e) {
359 assert(e);
360
361 /* We don't support people creating an event loop and keeping
362 * it around over a fork(). Let's complain. */
363
364 return e->original_pid != getpid_cached();
365 }
366
367 static void source_io_unregister(sd_event_source *s) {
368 assert(s);
369 assert(s->type == SOURCE_IO);
370
371 if (event_pid_changed(s->event))
372 return;
373
374 if (!s->io.registered)
375 return;
376
377 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
378 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
379 strna(s->description), event_source_type_to_string(s->type));
380
381 s->io.registered = false;
382 }
383
384 static int source_io_register(
385 sd_event_source *s,
386 int enabled,
387 uint32_t events) {
388
389 struct epoll_event ev;
390 int r;
391
392 assert(s);
393 assert(s->type == SOURCE_IO);
394 assert(enabled != SD_EVENT_OFF);
395
396 ev = (struct epoll_event) {
397 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
398 .data.ptr = s,
399 };
400
401 if (s->io.registered)
402 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
403 else
404 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
405 if (r < 0)
406 return -errno;
407
408 s->io.registered = true;
409
410 return 0;
411 }
412
413 static void source_child_pidfd_unregister(sd_event_source *s) {
414 assert(s);
415 assert(s->type == SOURCE_CHILD);
416
417 if (event_pid_changed(s->event))
418 return;
419
420 if (!s->child.registered)
421 return;
422
423 if (EVENT_SOURCE_WATCH_PIDFD(s))
424 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
425 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
426 strna(s->description), event_source_type_to_string(s->type));
427
428 s->child.registered = false;
429 }
430
431 static int source_child_pidfd_register(sd_event_source *s, int enabled) {
432 int r;
433
434 assert(s);
435 assert(s->type == SOURCE_CHILD);
436 assert(enabled != SD_EVENT_OFF);
437
438 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
439 struct epoll_event ev;
440
441 ev = (struct epoll_event) {
442 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
443 .data.ptr = s,
444 };
445
446 if (s->child.registered)
447 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
448 else
449 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
450 if (r < 0)
451 return -errno;
452 }
453
454 s->child.registered = true;
455 return 0;
456 }
457
458 static clockid_t event_source_type_to_clock(EventSourceType t) {
459
460 switch (t) {
461
462 case SOURCE_TIME_REALTIME:
463 return CLOCK_REALTIME;
464
465 case SOURCE_TIME_BOOTTIME:
466 return CLOCK_BOOTTIME;
467
468 case SOURCE_TIME_MONOTONIC:
469 return CLOCK_MONOTONIC;
470
471 case SOURCE_TIME_REALTIME_ALARM:
472 return CLOCK_REALTIME_ALARM;
473
474 case SOURCE_TIME_BOOTTIME_ALARM:
475 return CLOCK_BOOTTIME_ALARM;
476
477 default:
478 return (clockid_t) -1;
479 }
480 }
481
482 static EventSourceType clock_to_event_source_type(clockid_t clock) {
483
484 switch (clock) {
485
486 case CLOCK_REALTIME:
487 return SOURCE_TIME_REALTIME;
488
489 case CLOCK_BOOTTIME:
490 return SOURCE_TIME_BOOTTIME;
491
492 case CLOCK_MONOTONIC:
493 return SOURCE_TIME_MONOTONIC;
494
495 case CLOCK_REALTIME_ALARM:
496 return SOURCE_TIME_REALTIME_ALARM;
497
498 case CLOCK_BOOTTIME_ALARM:
499 return SOURCE_TIME_BOOTTIME_ALARM;
500
501 default:
502 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
503 }
504 }
505
506 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
507 assert(e);
508
509 switch (t) {
510
511 case SOURCE_TIME_REALTIME:
512 return &e->realtime;
513
514 case SOURCE_TIME_BOOTTIME:
515 return &e->boottime;
516
517 case SOURCE_TIME_MONOTONIC:
518 return &e->monotonic;
519
520 case SOURCE_TIME_REALTIME_ALARM:
521 return &e->realtime_alarm;
522
523 case SOURCE_TIME_BOOTTIME_ALARM:
524 return &e->boottime_alarm;
525
526 default:
527 return NULL;
528 }
529 }
530
531 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
532 assert(e);
533
534 if (!d)
535 return;
536
537 hashmap_remove(e->signal_data, &d->priority);
538 safe_close(d->fd);
539 free(d);
540 }
541
542 static int event_make_signal_data(
543 sd_event *e,
544 int sig,
545 struct signal_data **ret) {
546
547 struct epoll_event ev;
548 struct signal_data *d;
549 bool added = false;
550 sigset_t ss_copy;
551 int64_t priority;
552 int r;
553
554 assert(e);
555
556 if (event_pid_changed(e))
557 return -ECHILD;
558
559 if (e->signal_sources && e->signal_sources[sig])
560 priority = e->signal_sources[sig]->priority;
561 else
562 priority = SD_EVENT_PRIORITY_NORMAL;
563
564 d = hashmap_get(e->signal_data, &priority);
565 if (d) {
566 if (sigismember(&d->sigset, sig) > 0) {
567 if (ret)
568 *ret = d;
569 return 0;
570 }
571 } else {
572 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
573 if (r < 0)
574 return r;
575
576 d = new(struct signal_data, 1);
577 if (!d)
578 return -ENOMEM;
579
580 *d = (struct signal_data) {
581 .wakeup = WAKEUP_SIGNAL_DATA,
582 .fd = -1,
583 .priority = priority,
584 };
585
586 r = hashmap_put(e->signal_data, &d->priority, d);
587 if (r < 0) {
588 free(d);
589 return r;
590 }
591
592 added = true;
593 }
594
595 ss_copy = d->sigset;
596 assert_se(sigaddset(&ss_copy, sig) >= 0);
597
598 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
599 if (r < 0) {
600 r = -errno;
601 goto fail;
602 }
603
604 d->sigset = ss_copy;
605
606 if (d->fd >= 0) {
607 if (ret)
608 *ret = d;
609 return 0;
610 }
611
612 d->fd = fd_move_above_stdio(r);
613
614 ev = (struct epoll_event) {
615 .events = EPOLLIN,
616 .data.ptr = d,
617 };
618
619 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
620 if (r < 0) {
621 r = -errno;
622 goto fail;
623 }
624
625 if (ret)
626 *ret = d;
627
628 return 0;
629
630 fail:
631 if (added)
632 event_free_signal_data(e, d);
633
634 return r;
635 }
636
637 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
638 assert(e);
639 assert(d);
640
641 /* Turns off the specified signal in the signal data
642 * object. If the signal mask of the object becomes empty that
643 * way removes it. */
644
645 if (sigismember(&d->sigset, sig) == 0)
646 return;
647
648 assert_se(sigdelset(&d->sigset, sig) >= 0);
649
650 if (sigisemptyset(&d->sigset)) {
651 /* If all the mask is all-zero we can get rid of the structure */
652 event_free_signal_data(e, d);
653 return;
654 }
655
656 assert(d->fd >= 0);
657
658 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
659 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
660 }
661
662 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
663 struct signal_data *d;
664 static const int64_t zero_priority = 0;
665
666 assert(e);
667
668 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
669 * and possibly drop the signalfd for it. */
670
671 if (sig == SIGCHLD &&
672 e->n_enabled_child_sources > 0)
673 return;
674
675 if (e->signal_sources &&
676 e->signal_sources[sig] &&
677 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
678 return;
679
680 /*
681 * The specified signal might be enabled in three different queues:
682 *
683 * 1) the one that belongs to the priority passed (if it is non-NULL)
684 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
685 * 3) the 0 priority (to cover the SIGCHLD case)
686 *
687 * Hence, let's remove it from all three here.
688 */
689
690 if (priority) {
691 d = hashmap_get(e->signal_data, priority);
692 if (d)
693 event_unmask_signal_data(e, d, sig);
694 }
695
696 if (e->signal_sources && e->signal_sources[sig]) {
697 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
698 if (d)
699 event_unmask_signal_data(e, d, sig);
700 }
701
702 d = hashmap_get(e->signal_data, &zero_priority);
703 if (d)
704 event_unmask_signal_data(e, d, sig);
705 }
706
707 static void source_disconnect(sd_event_source *s) {
708 sd_event *event;
709
710 assert(s);
711
712 if (!s->event)
713 return;
714
715 assert(s->event->n_sources > 0);
716
717 switch (s->type) {
718
719 case SOURCE_IO:
720 if (s->io.fd >= 0)
721 source_io_unregister(s);
722
723 break;
724
725 case SOURCE_TIME_REALTIME:
726 case SOURCE_TIME_BOOTTIME:
727 case SOURCE_TIME_MONOTONIC:
728 case SOURCE_TIME_REALTIME_ALARM:
729 case SOURCE_TIME_BOOTTIME_ALARM: {
730 struct clock_data *d;
731
732 d = event_get_clock_data(s->event, s->type);
733 assert(d);
734
735 prioq_remove(d->earliest, s, &s->time.earliest_index);
736 prioq_remove(d->latest, s, &s->time.latest_index);
737 d->needs_rearm = true;
738 break;
739 }
740
741 case SOURCE_SIGNAL:
742 if (s->signal.sig > 0) {
743
744 if (s->event->signal_sources)
745 s->event->signal_sources[s->signal.sig] = NULL;
746
747 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
748 }
749
750 break;
751
752 case SOURCE_CHILD:
753 if (s->child.pid > 0) {
754 if (s->enabled != SD_EVENT_OFF) {
755 assert(s->event->n_enabled_child_sources > 0);
756 s->event->n_enabled_child_sources--;
757 }
758
759 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
760 }
761
762 if (EVENT_SOURCE_WATCH_PIDFD(s))
763 source_child_pidfd_unregister(s);
764 else
765 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
766
767 break;
768
769 case SOURCE_DEFER:
770 /* nothing */
771 break;
772
773 case SOURCE_POST:
774 set_remove(s->event->post_sources, s);
775 break;
776
777 case SOURCE_EXIT:
778 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
779 break;
780
781 case SOURCE_INOTIFY: {
782 struct inode_data *inode_data;
783
784 inode_data = s->inotify.inode_data;
785 if (inode_data) {
786 struct inotify_data *inotify_data;
787 assert_se(inotify_data = inode_data->inotify_data);
788
789 /* Detach this event source from the inode object */
790 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
791 s->inotify.inode_data = NULL;
792
793 if (s->pending) {
794 assert(inotify_data->n_pending > 0);
795 inotify_data->n_pending--;
796 }
797
798 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
799 * continued to being watched. That's because inotify doesn't really have an API for that: we
800 * can only change watch masks with access to the original inode either by fd or by path. But
801 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
802 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
803 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
804 * there), but given the need for open_by_handle_at() which is privileged and not universally
805 * available this would be quite an incomplete solution. Hence we go the other way, leave the
806 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
807 * anymore after reception. Yes, this sucks, but … Linux … */
808
809 /* Maybe release the inode data (and its inotify) */
810 event_gc_inode_data(s->event, inode_data);
811 }
812
813 break;
814 }
815
816 default:
817 assert_not_reached("Wut? I shouldn't exist.");
818 }
819
820 if (s->pending)
821 prioq_remove(s->event->pending, s, &s->pending_index);
822
823 if (s->prepare)
824 prioq_remove(s->event->prepare, s, &s->prepare_index);
825
826 event = s->event;
827
828 s->event = NULL;
829 LIST_REMOVE(sources, event->sources, s);
830 event->n_sources--;
831
832 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
833 * pidfd associated with this event source, which we'll do only on source_free(). */
834
835 if (!s->floating)
836 sd_event_unref(event);
837 }
838
839 static void source_free(sd_event_source *s) {
840 assert(s);
841
842 source_disconnect(s);
843
844 if (s->type == SOURCE_IO && s->io.owned)
845 s->io.fd = safe_close(s->io.fd);
846
847 if (s->type == SOURCE_CHILD) {
848 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
849
850 if (s->child.process_owned) {
851
852 if (!s->child.exited) {
853 bool sent = false;
854
855 if (s->child.pidfd >= 0) {
856 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
857 if (errno == ESRCH) /* Already dead */
858 sent = true;
859 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
860 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
861 s->child.pid);
862 } else
863 sent = true;
864 }
865
866 if (!sent)
867 if (kill(s->child.pid, SIGKILL) < 0)
868 if (errno != ESRCH) /* Already dead */
869 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
870 s->child.pid);
871 }
872
873 if (!s->child.waited) {
874 siginfo_t si = {};
875
876 /* Reap the child if we can */
877 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
878 }
879 }
880
881 if (s->child.pidfd_owned)
882 s->child.pidfd = safe_close(s->child.pidfd);
883 }
884
885 if (s->destroy_callback)
886 s->destroy_callback(s->userdata);
887
888 free(s->description);
889 free(s);
890 }
891 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
892
893 static int source_set_pending(sd_event_source *s, bool b) {
894 int r;
895
896 assert(s);
897 assert(s->type != SOURCE_EXIT);
898
899 if (s->pending == b)
900 return 0;
901
902 s->pending = b;
903
904 if (b) {
905 s->pending_iteration = s->event->iteration;
906
907 r = prioq_put(s->event->pending, s, &s->pending_index);
908 if (r < 0) {
909 s->pending = false;
910 return r;
911 }
912 } else
913 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
914
915 if (EVENT_SOURCE_IS_TIME(s->type)) {
916 struct clock_data *d;
917
918 d = event_get_clock_data(s->event, s->type);
919 assert(d);
920
921 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
922 prioq_reshuffle(d->latest, s, &s->time.latest_index);
923 d->needs_rearm = true;
924 }
925
926 if (s->type == SOURCE_SIGNAL && !b) {
927 struct signal_data *d;
928
929 d = hashmap_get(s->event->signal_data, &s->priority);
930 if (d && d->current == s)
931 d->current = NULL;
932 }
933
934 if (s->type == SOURCE_INOTIFY) {
935
936 assert(s->inotify.inode_data);
937 assert(s->inotify.inode_data->inotify_data);
938
939 if (b)
940 s->inotify.inode_data->inotify_data->n_pending ++;
941 else {
942 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
943 s->inotify.inode_data->inotify_data->n_pending --;
944 }
945 }
946
947 return 0;
948 }
949
950 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
951 sd_event_source *s;
952
953 assert(e);
954
955 s = new(sd_event_source, 1);
956 if (!s)
957 return NULL;
958
959 *s = (struct sd_event_source) {
960 .n_ref = 1,
961 .event = e,
962 .floating = floating,
963 .type = type,
964 .pending_index = PRIOQ_IDX_NULL,
965 .prepare_index = PRIOQ_IDX_NULL,
966 };
967
968 if (!floating)
969 sd_event_ref(e);
970
971 LIST_PREPEND(sources, e->sources, s);
972 e->n_sources++;
973
974 return s;
975 }
976
977 _public_ int sd_event_add_io(
978 sd_event *e,
979 sd_event_source **ret,
980 int fd,
981 uint32_t events,
982 sd_event_io_handler_t callback,
983 void *userdata) {
984
985 _cleanup_(source_freep) sd_event_source *s = NULL;
986 int r;
987
988 assert_return(e, -EINVAL);
989 assert_return(e = event_resolve(e), -ENOPKG);
990 assert_return(fd >= 0, -EBADF);
991 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
992 assert_return(callback, -EINVAL);
993 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
994 assert_return(!event_pid_changed(e), -ECHILD);
995
996 s = source_new(e, !ret, SOURCE_IO);
997 if (!s)
998 return -ENOMEM;
999
1000 s->wakeup = WAKEUP_EVENT_SOURCE;
1001 s->io.fd = fd;
1002 s->io.events = events;
1003 s->io.callback = callback;
1004 s->userdata = userdata;
1005 s->enabled = SD_EVENT_ON;
1006
1007 r = source_io_register(s, s->enabled, events);
1008 if (r < 0)
1009 return r;
1010
1011 if (ret)
1012 *ret = s;
1013 TAKE_PTR(s);
1014
1015 return 0;
1016 }
1017
1018 static void initialize_perturb(sd_event *e) {
1019 sd_id128_t bootid = {};
1020
1021 /* When we sleep for longer, we try to realign the wakeup to
1022 the same time within each minute/second/250ms, so that
1023 events all across the system can be coalesced into a single
1024 CPU wakeup. However, let's take some system-specific
1025 randomness for this value, so that in a network of systems
1026 with synced clocks timer events are distributed a
1027 bit. Here, we calculate a perturbation usec offset from the
1028 boot ID. */
1029
1030 if (_likely_(e->perturb != USEC_INFINITY))
1031 return;
1032
1033 if (sd_id128_get_boot(&bootid) >= 0)
1034 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1035 }
1036
1037 static int event_setup_timer_fd(
1038 sd_event *e,
1039 struct clock_data *d,
1040 clockid_t clock) {
1041
1042 struct epoll_event ev;
1043 int r, fd;
1044
1045 assert(e);
1046 assert(d);
1047
1048 if (_likely_(d->fd >= 0))
1049 return 0;
1050
1051 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1052 if (fd < 0)
1053 return -errno;
1054
1055 fd = fd_move_above_stdio(fd);
1056
1057 ev = (struct epoll_event) {
1058 .events = EPOLLIN,
1059 .data.ptr = d,
1060 };
1061
1062 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1063 if (r < 0) {
1064 safe_close(fd);
1065 return -errno;
1066 }
1067
1068 d->fd = fd;
1069 return 0;
1070 }
1071
1072 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1073 assert(s);
1074
1075 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1076 }
1077
1078 _public_ int sd_event_add_time(
1079 sd_event *e,
1080 sd_event_source **ret,
1081 clockid_t clock,
1082 uint64_t usec,
1083 uint64_t accuracy,
1084 sd_event_time_handler_t callback,
1085 void *userdata) {
1086
1087 EventSourceType type;
1088 _cleanup_(source_freep) sd_event_source *s = NULL;
1089 struct clock_data *d;
1090 int r;
1091
1092 assert_return(e, -EINVAL);
1093 assert_return(e = event_resolve(e), -ENOPKG);
1094 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1095 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1096 assert_return(!event_pid_changed(e), -ECHILD);
1097
1098 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1099 return -EOPNOTSUPP;
1100
1101 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1102 if (type < 0)
1103 return -EOPNOTSUPP;
1104
1105 if (!callback)
1106 callback = time_exit_callback;
1107
1108 d = event_get_clock_data(e, type);
1109 assert(d);
1110
1111 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1112 if (r < 0)
1113 return r;
1114
1115 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1116 if (r < 0)
1117 return r;
1118
1119 if (d->fd < 0) {
1120 r = event_setup_timer_fd(e, d, clock);
1121 if (r < 0)
1122 return r;
1123 }
1124
1125 s = source_new(e, !ret, type);
1126 if (!s)
1127 return -ENOMEM;
1128
1129 s->time.next = usec;
1130 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1131 s->time.callback = callback;
1132 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1133 s->userdata = userdata;
1134 s->enabled = SD_EVENT_ONESHOT;
1135
1136 d->needs_rearm = true;
1137
1138 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1139 if (r < 0)
1140 return r;
1141
1142 r = prioq_put(d->latest, s, &s->time.latest_index);
1143 if (r < 0)
1144 return r;
1145
1146 if (ret)
1147 *ret = s;
1148 TAKE_PTR(s);
1149
1150 return 0;
1151 }
1152
1153 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1154 assert(s);
1155
1156 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1157 }
1158
1159 _public_ int sd_event_add_signal(
1160 sd_event *e,
1161 sd_event_source **ret,
1162 int sig,
1163 sd_event_signal_handler_t callback,
1164 void *userdata) {
1165
1166 _cleanup_(source_freep) sd_event_source *s = NULL;
1167 struct signal_data *d;
1168 int r;
1169
1170 assert_return(e, -EINVAL);
1171 assert_return(e = event_resolve(e), -ENOPKG);
1172 assert_return(SIGNAL_VALID(sig), -EINVAL);
1173 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1174 assert_return(!event_pid_changed(e), -ECHILD);
1175
1176 if (!callback)
1177 callback = signal_exit_callback;
1178
1179 r = signal_is_blocked(sig);
1180 if (r < 0)
1181 return r;
1182 if (r == 0)
1183 return -EBUSY;
1184
1185 if (!e->signal_sources) {
1186 e->signal_sources = new0(sd_event_source*, _NSIG);
1187 if (!e->signal_sources)
1188 return -ENOMEM;
1189 } else if (e->signal_sources[sig])
1190 return -EBUSY;
1191
1192 s = source_new(e, !ret, SOURCE_SIGNAL);
1193 if (!s)
1194 return -ENOMEM;
1195
1196 s->signal.sig = sig;
1197 s->signal.callback = callback;
1198 s->userdata = userdata;
1199 s->enabled = SD_EVENT_ON;
1200
1201 e->signal_sources[sig] = s;
1202
1203 r = event_make_signal_data(e, sig, &d);
1204 if (r < 0)
1205 return r;
1206
1207 /* Use the signal name as description for the event source by default */
1208 (void) sd_event_source_set_description(s, signal_to_string(sig));
1209
1210 if (ret)
1211 *ret = s;
1212 TAKE_PTR(s);
1213
1214 return 0;
1215 }
1216
1217 static bool shall_use_pidfd(void) {
1218 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1219 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1220 }
1221
1222 _public_ int sd_event_add_child(
1223 sd_event *e,
1224 sd_event_source **ret,
1225 pid_t pid,
1226 int options,
1227 sd_event_child_handler_t callback,
1228 void *userdata) {
1229
1230 _cleanup_(source_freep) sd_event_source *s = NULL;
1231 int r;
1232
1233 assert_return(e, -EINVAL);
1234 assert_return(e = event_resolve(e), -ENOPKG);
1235 assert_return(pid > 1, -EINVAL);
1236 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1237 assert_return(options != 0, -EINVAL);
1238 assert_return(callback, -EINVAL);
1239 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1240 assert_return(!event_pid_changed(e), -ECHILD);
1241
1242 if (e->n_enabled_child_sources == 0) {
1243 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1244 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1245 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1246 * take effect.
1247 *
1248 * (As an optimization we only do this check on the first child event source created.) */
1249 r = signal_is_blocked(SIGCHLD);
1250 if (r < 0)
1251 return r;
1252 if (r == 0)
1253 return -EBUSY;
1254 }
1255
1256 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1257 if (r < 0)
1258 return r;
1259
1260 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1261 return -EBUSY;
1262
1263 s = source_new(e, !ret, SOURCE_CHILD);
1264 if (!s)
1265 return -ENOMEM;
1266
1267 s->wakeup = WAKEUP_EVENT_SOURCE;
1268 s->child.pid = pid;
1269 s->child.options = options;
1270 s->child.callback = callback;
1271 s->userdata = userdata;
1272 s->enabled = SD_EVENT_ONESHOT;
1273
1274 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1275 * pin the PID, and make regular waitid() handling race-free. */
1276
1277 if (shall_use_pidfd()) {
1278 s->child.pidfd = pidfd_open(s->child.pid, 0);
1279 if (s->child.pidfd < 0) {
1280 /* Propagate errors unless the syscall is not supported or blocked */
1281 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1282 return -errno;
1283 } else
1284 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1285 } else
1286 s->child.pidfd = -1;
1287
1288 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1289 if (r < 0)
1290 return r;
1291
1292 e->n_enabled_child_sources++;
1293
1294 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1295 /* We have a pidfd and we only want to watch for exit */
1296
1297 r = source_child_pidfd_register(s, s->enabled);
1298 if (r < 0) {
1299 e->n_enabled_child_sources--;
1300 return r;
1301 }
1302 } else {
1303 /* We have no pidfd or we shall wait for some other event than WEXITED */
1304
1305 r = event_make_signal_data(e, SIGCHLD, NULL);
1306 if (r < 0) {
1307 e->n_enabled_child_sources--;
1308 return r;
1309 }
1310
1311 e->need_process_child = true;
1312 }
1313
1314 if (ret)
1315 *ret = s;
1316
1317 TAKE_PTR(s);
1318 return 0;
1319 }
1320
1321 _public_ int sd_event_add_child_pidfd(
1322 sd_event *e,
1323 sd_event_source **ret,
1324 int pidfd,
1325 int options,
1326 sd_event_child_handler_t callback,
1327 void *userdata) {
1328
1329
1330 _cleanup_(source_freep) sd_event_source *s = NULL;
1331 pid_t pid;
1332 int r;
1333
1334 assert_return(e, -EINVAL);
1335 assert_return(e = event_resolve(e), -ENOPKG);
1336 assert_return(pidfd >= 0, -EBADF);
1337 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1338 assert_return(options != 0, -EINVAL);
1339 assert_return(callback, -EINVAL);
1340 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1341 assert_return(!event_pid_changed(e), -ECHILD);
1342
1343 if (e->n_enabled_child_sources == 0) {
1344 r = signal_is_blocked(SIGCHLD);
1345 if (r < 0)
1346 return r;
1347 if (r == 0)
1348 return -EBUSY;
1349 }
1350
1351 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1352 if (r < 0)
1353 return r;
1354
1355 r = pidfd_get_pid(pidfd, &pid);
1356 if (r < 0)
1357 return r;
1358
1359 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1360 return -EBUSY;
1361
1362 s = source_new(e, !ret, SOURCE_CHILD);
1363 if (!s)
1364 return -ENOMEM;
1365
1366 s->wakeup = WAKEUP_EVENT_SOURCE;
1367 s->child.pidfd = pidfd;
1368 s->child.pid = pid;
1369 s->child.options = options;
1370 s->child.callback = callback;
1371 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1372 s->userdata = userdata;
1373 s->enabled = SD_EVENT_ONESHOT;
1374
1375 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1376 if (r < 0)
1377 return r;
1378
1379 e->n_enabled_child_sources++;
1380
1381 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1382 /* We only want to watch for WEXITED */
1383
1384 r = source_child_pidfd_register(s, s->enabled);
1385 if (r < 0) {
1386 e->n_enabled_child_sources--;
1387 return r;
1388 }
1389 } else {
1390 /* We shall wait for some other event than WEXITED */
1391
1392 r = event_make_signal_data(e, SIGCHLD, NULL);
1393 if (r < 0) {
1394 e->n_enabled_child_sources--;
1395 return r;
1396 }
1397
1398 e->need_process_child = true;
1399 }
1400
1401 if (ret)
1402 *ret = s;
1403
1404 TAKE_PTR(s);
1405 return 0;
1406 }
1407
1408 _public_ int sd_event_add_defer(
1409 sd_event *e,
1410 sd_event_source **ret,
1411 sd_event_handler_t callback,
1412 void *userdata) {
1413
1414 _cleanup_(source_freep) sd_event_source *s = NULL;
1415 int r;
1416
1417 assert_return(e, -EINVAL);
1418 assert_return(e = event_resolve(e), -ENOPKG);
1419 assert_return(callback, -EINVAL);
1420 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1421 assert_return(!event_pid_changed(e), -ECHILD);
1422
1423 s = source_new(e, !ret, SOURCE_DEFER);
1424 if (!s)
1425 return -ENOMEM;
1426
1427 s->defer.callback = callback;
1428 s->userdata = userdata;
1429 s->enabled = SD_EVENT_ONESHOT;
1430
1431 r = source_set_pending(s, true);
1432 if (r < 0)
1433 return r;
1434
1435 if (ret)
1436 *ret = s;
1437 TAKE_PTR(s);
1438
1439 return 0;
1440 }
1441
1442 _public_ int sd_event_add_post(
1443 sd_event *e,
1444 sd_event_source **ret,
1445 sd_event_handler_t callback,
1446 void *userdata) {
1447
1448 _cleanup_(source_freep) sd_event_source *s = NULL;
1449 int r;
1450
1451 assert_return(e, -EINVAL);
1452 assert_return(e = event_resolve(e), -ENOPKG);
1453 assert_return(callback, -EINVAL);
1454 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1455 assert_return(!event_pid_changed(e), -ECHILD);
1456
1457 r = set_ensure_allocated(&e->post_sources, NULL);
1458 if (r < 0)
1459 return r;
1460
1461 s = source_new(e, !ret, SOURCE_POST);
1462 if (!s)
1463 return -ENOMEM;
1464
1465 s->post.callback = callback;
1466 s->userdata = userdata;
1467 s->enabled = SD_EVENT_ON;
1468
1469 r = set_put(e->post_sources, s);
1470 if (r < 0)
1471 return r;
1472
1473 if (ret)
1474 *ret = s;
1475 TAKE_PTR(s);
1476
1477 return 0;
1478 }
1479
1480 _public_ int sd_event_add_exit(
1481 sd_event *e,
1482 sd_event_source **ret,
1483 sd_event_handler_t callback,
1484 void *userdata) {
1485
1486 _cleanup_(source_freep) sd_event_source *s = NULL;
1487 int r;
1488
1489 assert_return(e, -EINVAL);
1490 assert_return(e = event_resolve(e), -ENOPKG);
1491 assert_return(callback, -EINVAL);
1492 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1493 assert_return(!event_pid_changed(e), -ECHILD);
1494
1495 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1496 if (r < 0)
1497 return r;
1498
1499 s = source_new(e, !ret, SOURCE_EXIT);
1500 if (!s)
1501 return -ENOMEM;
1502
1503 s->exit.callback = callback;
1504 s->userdata = userdata;
1505 s->exit.prioq_index = PRIOQ_IDX_NULL;
1506 s->enabled = SD_EVENT_ONESHOT;
1507
1508 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1509 if (r < 0)
1510 return r;
1511
1512 if (ret)
1513 *ret = s;
1514 TAKE_PTR(s);
1515
1516 return 0;
1517 }
1518
1519 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1520 assert(e);
1521
1522 if (!d)
1523 return;
1524
1525 assert(hashmap_isempty(d->inodes));
1526 assert(hashmap_isempty(d->wd));
1527
1528 if (d->buffer_filled > 0)
1529 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1530
1531 hashmap_free(d->inodes);
1532 hashmap_free(d->wd);
1533
1534 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1535
1536 if (d->fd >= 0) {
1537 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1538 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1539
1540 safe_close(d->fd);
1541 }
1542 free(d);
1543 }
1544
1545 static int event_make_inotify_data(
1546 sd_event *e,
1547 int64_t priority,
1548 struct inotify_data **ret) {
1549
1550 _cleanup_close_ int fd = -1;
1551 struct inotify_data *d;
1552 struct epoll_event ev;
1553 int r;
1554
1555 assert(e);
1556
1557 d = hashmap_get(e->inotify_data, &priority);
1558 if (d) {
1559 if (ret)
1560 *ret = d;
1561 return 0;
1562 }
1563
1564 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1565 if (fd < 0)
1566 return -errno;
1567
1568 fd = fd_move_above_stdio(fd);
1569
1570 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1571 if (r < 0)
1572 return r;
1573
1574 d = new(struct inotify_data, 1);
1575 if (!d)
1576 return -ENOMEM;
1577
1578 *d = (struct inotify_data) {
1579 .wakeup = WAKEUP_INOTIFY_DATA,
1580 .fd = TAKE_FD(fd),
1581 .priority = priority,
1582 };
1583
1584 r = hashmap_put(e->inotify_data, &d->priority, d);
1585 if (r < 0) {
1586 d->fd = safe_close(d->fd);
1587 free(d);
1588 return r;
1589 }
1590
1591 ev = (struct epoll_event) {
1592 .events = EPOLLIN,
1593 .data.ptr = d,
1594 };
1595
1596 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1597 r = -errno;
1598 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1599 * remove the fd from the epoll first, which we don't want as we couldn't
1600 * add it in the first place. */
1601 event_free_inotify_data(e, d);
1602 return r;
1603 }
1604
1605 if (ret)
1606 *ret = d;
1607
1608 return 1;
1609 }
1610
1611 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1612 int r;
1613
1614 assert(x);
1615 assert(y);
1616
1617 r = CMP(x->dev, y->dev);
1618 if (r != 0)
1619 return r;
1620
1621 return CMP(x->ino, y->ino);
1622 }
1623
1624 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1625 assert(d);
1626
1627 siphash24_compress(&d->dev, sizeof(d->dev), state);
1628 siphash24_compress(&d->ino, sizeof(d->ino), state);
1629 }
1630
1631 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1632
1633 static void event_free_inode_data(
1634 sd_event *e,
1635 struct inode_data *d) {
1636
1637 assert(e);
1638
1639 if (!d)
1640 return;
1641
1642 assert(!d->event_sources);
1643
1644 if (d->fd >= 0) {
1645 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1646 safe_close(d->fd);
1647 }
1648
1649 if (d->inotify_data) {
1650
1651 if (d->wd >= 0) {
1652 if (d->inotify_data->fd >= 0) {
1653 /* So here's a problem. At the time this runs the watch descriptor might already be
1654 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1655 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1656 * likely case to happen. */
1657
1658 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1659 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1660 }
1661
1662 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1663 }
1664
1665 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1666 }
1667
1668 free(d);
1669 }
1670
1671 static void event_gc_inode_data(
1672 sd_event *e,
1673 struct inode_data *d) {
1674
1675 struct inotify_data *inotify_data;
1676
1677 assert(e);
1678
1679 if (!d)
1680 return;
1681
1682 if (d->event_sources)
1683 return;
1684
1685 inotify_data = d->inotify_data;
1686 event_free_inode_data(e, d);
1687
1688 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1689 event_free_inotify_data(e, inotify_data);
1690 }
1691
1692 static int event_make_inode_data(
1693 sd_event *e,
1694 struct inotify_data *inotify_data,
1695 dev_t dev,
1696 ino_t ino,
1697 struct inode_data **ret) {
1698
1699 struct inode_data *d, key;
1700 int r;
1701
1702 assert(e);
1703 assert(inotify_data);
1704
1705 key = (struct inode_data) {
1706 .ino = ino,
1707 .dev = dev,
1708 };
1709
1710 d = hashmap_get(inotify_data->inodes, &key);
1711 if (d) {
1712 if (ret)
1713 *ret = d;
1714
1715 return 0;
1716 }
1717
1718 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1719 if (r < 0)
1720 return r;
1721
1722 d = new(struct inode_data, 1);
1723 if (!d)
1724 return -ENOMEM;
1725
1726 *d = (struct inode_data) {
1727 .dev = dev,
1728 .ino = ino,
1729 .wd = -1,
1730 .fd = -1,
1731 .inotify_data = inotify_data,
1732 };
1733
1734 r = hashmap_put(inotify_data->inodes, d, d);
1735 if (r < 0) {
1736 free(d);
1737 return r;
1738 }
1739
1740 if (ret)
1741 *ret = d;
1742
1743 return 1;
1744 }
1745
1746 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1747 bool excl_unlink = true;
1748 uint32_t combined = 0;
1749 sd_event_source *s;
1750
1751 assert(d);
1752
1753 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1754 * the IN_EXCL_UNLINK flag is ANDed instead.
1755 *
1756 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1757 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1758 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1759 * events we don't care for client-side. */
1760
1761 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1762
1763 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1764 excl_unlink = false;
1765
1766 combined |= s->inotify.mask;
1767 }
1768
1769 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1770 }
1771
1772 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1773 uint32_t combined_mask;
1774 int wd, r;
1775
1776 assert(d);
1777 assert(d->fd >= 0);
1778
1779 combined_mask = inode_data_determine_mask(d);
1780
1781 if (d->wd >= 0 && combined_mask == d->combined_mask)
1782 return 0;
1783
1784 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1785 if (r < 0)
1786 return r;
1787
1788 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1789 if (wd < 0)
1790 return -errno;
1791
1792 if (d->wd < 0) {
1793 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1794 if (r < 0) {
1795 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1796 return r;
1797 }
1798
1799 d->wd = wd;
1800
1801 } else if (d->wd != wd) {
1802
1803 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1804 (void) inotify_rm_watch(d->fd, wd);
1805 return -EINVAL;
1806 }
1807
1808 d->combined_mask = combined_mask;
1809 return 1;
1810 }
1811
1812 _public_ int sd_event_add_inotify(
1813 sd_event *e,
1814 sd_event_source **ret,
1815 const char *path,
1816 uint32_t mask,
1817 sd_event_inotify_handler_t callback,
1818 void *userdata) {
1819
1820 struct inotify_data *inotify_data = NULL;
1821 struct inode_data *inode_data = NULL;
1822 _cleanup_close_ int fd = -1;
1823 _cleanup_(source_freep) sd_event_source *s = NULL;
1824 struct stat st;
1825 int r;
1826
1827 assert_return(e, -EINVAL);
1828 assert_return(e = event_resolve(e), -ENOPKG);
1829 assert_return(path, -EINVAL);
1830 assert_return(callback, -EINVAL);
1831 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1832 assert_return(!event_pid_changed(e), -ECHILD);
1833
1834 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1835 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1836 * the user can't use them for us. */
1837 if (mask & IN_MASK_ADD)
1838 return -EINVAL;
1839
1840 fd = open(path, O_PATH|O_CLOEXEC|
1841 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1842 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1843 if (fd < 0)
1844 return -errno;
1845
1846 if (fstat(fd, &st) < 0)
1847 return -errno;
1848
1849 s = source_new(e, !ret, SOURCE_INOTIFY);
1850 if (!s)
1851 return -ENOMEM;
1852
1853 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1854 s->inotify.mask = mask;
1855 s->inotify.callback = callback;
1856 s->userdata = userdata;
1857
1858 /* Allocate an inotify object for this priority, and an inode object within it */
1859 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1860 if (r < 0)
1861 return r;
1862
1863 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1864 if (r < 0) {
1865 event_free_inotify_data(e, inotify_data);
1866 return r;
1867 }
1868
1869 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1870 * the event source, until then, for which we need the original inode. */
1871 if (inode_data->fd < 0) {
1872 inode_data->fd = TAKE_FD(fd);
1873 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1874 }
1875
1876 /* Link our event source to the inode data object */
1877 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1878 s->inotify.inode_data = inode_data;
1879
1880 /* Actually realize the watch now */
1881 r = inode_data_realize_watch(e, inode_data);
1882 if (r < 0)
1883 return r;
1884
1885 (void) sd_event_source_set_description(s, path);
1886
1887 if (ret)
1888 *ret = s;
1889 TAKE_PTR(s);
1890
1891 return 0;
1892 }
1893
1894 static sd_event_source* event_source_free(sd_event_source *s) {
1895 if (!s)
1896 return NULL;
1897
1898 /* Here's a special hack: when we are called from a
1899 * dispatch handler we won't free the event source
1900 * immediately, but we will detach the fd from the
1901 * epoll. This way it is safe for the caller to unref
1902 * the event source and immediately close the fd, but
1903 * we still retain a valid event source object after
1904 * the callback. */
1905
1906 if (s->dispatching) {
1907 if (s->type == SOURCE_IO)
1908 source_io_unregister(s);
1909
1910 source_disconnect(s);
1911 } else
1912 source_free(s);
1913
1914 return NULL;
1915 }
1916
1917 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1918
1919 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1920 assert_return(s, -EINVAL);
1921 assert_return(!event_pid_changed(s->event), -ECHILD);
1922
1923 return free_and_strdup(&s->description, description);
1924 }
1925
1926 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1927 assert_return(s, -EINVAL);
1928 assert_return(description, -EINVAL);
1929 assert_return(!event_pid_changed(s->event), -ECHILD);
1930
1931 if (!s->description)
1932 return -ENXIO;
1933
1934 *description = s->description;
1935 return 0;
1936 }
1937
1938 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1939 assert_return(s, NULL);
1940
1941 return s->event;
1942 }
1943
1944 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1945 assert_return(s, -EINVAL);
1946 assert_return(s->type != SOURCE_EXIT, -EDOM);
1947 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1948 assert_return(!event_pid_changed(s->event), -ECHILD);
1949
1950 return s->pending;
1951 }
1952
1953 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1954 assert_return(s, -EINVAL);
1955 assert_return(s->type == SOURCE_IO, -EDOM);
1956 assert_return(!event_pid_changed(s->event), -ECHILD);
1957
1958 return s->io.fd;
1959 }
1960
1961 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1962 int r;
1963
1964 assert_return(s, -EINVAL);
1965 assert_return(fd >= 0, -EBADF);
1966 assert_return(s->type == SOURCE_IO, -EDOM);
1967 assert_return(!event_pid_changed(s->event), -ECHILD);
1968
1969 if (s->io.fd == fd)
1970 return 0;
1971
1972 if (s->enabled == SD_EVENT_OFF) {
1973 s->io.fd = fd;
1974 s->io.registered = false;
1975 } else {
1976 int saved_fd;
1977
1978 saved_fd = s->io.fd;
1979 assert(s->io.registered);
1980
1981 s->io.fd = fd;
1982 s->io.registered = false;
1983
1984 r = source_io_register(s, s->enabled, s->io.events);
1985 if (r < 0) {
1986 s->io.fd = saved_fd;
1987 s->io.registered = true;
1988 return r;
1989 }
1990
1991 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1992 }
1993
1994 return 0;
1995 }
1996
1997 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1998 assert_return(s, -EINVAL);
1999 assert_return(s->type == SOURCE_IO, -EDOM);
2000
2001 return s->io.owned;
2002 }
2003
2004 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2005 assert_return(s, -EINVAL);
2006 assert_return(s->type == SOURCE_IO, -EDOM);
2007
2008 s->io.owned = own;
2009 return 0;
2010 }
2011
2012 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2013 assert_return(s, -EINVAL);
2014 assert_return(events, -EINVAL);
2015 assert_return(s->type == SOURCE_IO, -EDOM);
2016 assert_return(!event_pid_changed(s->event), -ECHILD);
2017
2018 *events = s->io.events;
2019 return 0;
2020 }
2021
2022 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2023 int r;
2024
2025 assert_return(s, -EINVAL);
2026 assert_return(s->type == SOURCE_IO, -EDOM);
2027 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2028 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2029 assert_return(!event_pid_changed(s->event), -ECHILD);
2030
2031 /* edge-triggered updates are never skipped, so we can reset edges */
2032 if (s->io.events == events && !(events & EPOLLET))
2033 return 0;
2034
2035 r = source_set_pending(s, false);
2036 if (r < 0)
2037 return r;
2038
2039 if (s->enabled != SD_EVENT_OFF) {
2040 r = source_io_register(s, s->enabled, events);
2041 if (r < 0)
2042 return r;
2043 }
2044
2045 s->io.events = events;
2046
2047 return 0;
2048 }
2049
2050 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2051 assert_return(s, -EINVAL);
2052 assert_return(revents, -EINVAL);
2053 assert_return(s->type == SOURCE_IO, -EDOM);
2054 assert_return(s->pending, -ENODATA);
2055 assert_return(!event_pid_changed(s->event), -ECHILD);
2056
2057 *revents = s->io.revents;
2058 return 0;
2059 }
2060
2061 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2062 assert_return(s, -EINVAL);
2063 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2064 assert_return(!event_pid_changed(s->event), -ECHILD);
2065
2066 return s->signal.sig;
2067 }
2068
2069 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2070 assert_return(s, -EINVAL);
2071 assert_return(!event_pid_changed(s->event), -ECHILD);
2072
2073 *priority = s->priority;
2074 return 0;
2075 }
2076
2077 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2078 bool rm_inotify = false, rm_inode = false;
2079 struct inotify_data *new_inotify_data = NULL;
2080 struct inode_data *new_inode_data = NULL;
2081 int r;
2082
2083 assert_return(s, -EINVAL);
2084 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2085 assert_return(!event_pid_changed(s->event), -ECHILD);
2086
2087 if (s->priority == priority)
2088 return 0;
2089
2090 if (s->type == SOURCE_INOTIFY) {
2091 struct inode_data *old_inode_data;
2092
2093 assert(s->inotify.inode_data);
2094 old_inode_data = s->inotify.inode_data;
2095
2096 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2097 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2098 * events we allow priority changes only until the first following iteration. */
2099 if (old_inode_data->fd < 0)
2100 return -EOPNOTSUPP;
2101
2102 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2103 if (r < 0)
2104 return r;
2105 rm_inotify = r > 0;
2106
2107 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2108 if (r < 0)
2109 goto fail;
2110 rm_inode = r > 0;
2111
2112 if (new_inode_data->fd < 0) {
2113 /* Duplicate the fd for the new inode object if we don't have any yet */
2114 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2115 if (new_inode_data->fd < 0) {
2116 r = -errno;
2117 goto fail;
2118 }
2119
2120 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2121 }
2122
2123 /* Move the event source to the new inode data structure */
2124 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2125 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2126 s->inotify.inode_data = new_inode_data;
2127
2128 /* Now create the new watch */
2129 r = inode_data_realize_watch(s->event, new_inode_data);
2130 if (r < 0) {
2131 /* Move it back */
2132 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2133 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2134 s->inotify.inode_data = old_inode_data;
2135 goto fail;
2136 }
2137
2138 s->priority = priority;
2139
2140 event_gc_inode_data(s->event, old_inode_data);
2141
2142 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2143 struct signal_data *old, *d;
2144
2145 /* Move us from the signalfd belonging to the old
2146 * priority to the signalfd of the new priority */
2147
2148 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2149
2150 s->priority = priority;
2151
2152 r = event_make_signal_data(s->event, s->signal.sig, &d);
2153 if (r < 0) {
2154 s->priority = old->priority;
2155 return r;
2156 }
2157
2158 event_unmask_signal_data(s->event, old, s->signal.sig);
2159 } else
2160 s->priority = priority;
2161
2162 if (s->pending)
2163 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2164
2165 if (s->prepare)
2166 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2167
2168 if (s->type == SOURCE_EXIT)
2169 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2170
2171 return 0;
2172
2173 fail:
2174 if (rm_inode)
2175 event_free_inode_data(s->event, new_inode_data);
2176
2177 if (rm_inotify)
2178 event_free_inotify_data(s->event, new_inotify_data);
2179
2180 return r;
2181 }
2182
2183 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2184 assert_return(s, -EINVAL);
2185 assert_return(!event_pid_changed(s->event), -ECHILD);
2186
2187 if (m)
2188 *m = s->enabled;
2189 return s->enabled != SD_EVENT_OFF;
2190 }
2191
2192 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2193 int r;
2194
2195 assert_return(s, -EINVAL);
2196 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2197 assert_return(!event_pid_changed(s->event), -ECHILD);
2198
2199 /* If we are dead anyway, we are fine with turning off
2200 * sources, but everything else needs to fail. */
2201 if (s->event->state == SD_EVENT_FINISHED)
2202 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2203
2204 if (s->enabled == m)
2205 return 0;
2206
2207 if (m == SD_EVENT_OFF) {
2208
2209 /* Unset the pending flag when this event source is disabled */
2210 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2211 r = source_set_pending(s, false);
2212 if (r < 0)
2213 return r;
2214 }
2215
2216 switch (s->type) {
2217
2218 case SOURCE_IO:
2219 source_io_unregister(s);
2220 s->enabled = m;
2221 break;
2222
2223 case SOURCE_TIME_REALTIME:
2224 case SOURCE_TIME_BOOTTIME:
2225 case SOURCE_TIME_MONOTONIC:
2226 case SOURCE_TIME_REALTIME_ALARM:
2227 case SOURCE_TIME_BOOTTIME_ALARM: {
2228 struct clock_data *d;
2229
2230 s->enabled = m;
2231 d = event_get_clock_data(s->event, s->type);
2232 assert(d);
2233
2234 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2235 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2236 d->needs_rearm = true;
2237 break;
2238 }
2239
2240 case SOURCE_SIGNAL:
2241 s->enabled = m;
2242
2243 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2244 break;
2245
2246 case SOURCE_CHILD:
2247 s->enabled = m;
2248
2249 assert(s->event->n_enabled_child_sources > 0);
2250 s->event->n_enabled_child_sources--;
2251
2252 if (EVENT_SOURCE_WATCH_PIDFD(s))
2253 source_child_pidfd_unregister(s);
2254 else
2255 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2256
2257 break;
2258
2259 case SOURCE_EXIT:
2260 s->enabled = m;
2261 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2262 break;
2263
2264 case SOURCE_DEFER:
2265 case SOURCE_POST:
2266 case SOURCE_INOTIFY:
2267 s->enabled = m;
2268 break;
2269
2270 default:
2271 assert_not_reached("Wut? I shouldn't exist.");
2272 }
2273
2274 } else {
2275
2276 /* Unset the pending flag when this event source is enabled */
2277 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2278 r = source_set_pending(s, false);
2279 if (r < 0)
2280 return r;
2281 }
2282
2283 switch (s->type) {
2284
2285 case SOURCE_IO:
2286 r = source_io_register(s, m, s->io.events);
2287 if (r < 0)
2288 return r;
2289
2290 s->enabled = m;
2291 break;
2292
2293 case SOURCE_TIME_REALTIME:
2294 case SOURCE_TIME_BOOTTIME:
2295 case SOURCE_TIME_MONOTONIC:
2296 case SOURCE_TIME_REALTIME_ALARM:
2297 case SOURCE_TIME_BOOTTIME_ALARM: {
2298 struct clock_data *d;
2299
2300 s->enabled = m;
2301 d = event_get_clock_data(s->event, s->type);
2302 assert(d);
2303
2304 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2305 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2306 d->needs_rearm = true;
2307 break;
2308 }
2309
2310 case SOURCE_SIGNAL:
2311
2312 s->enabled = m;
2313
2314 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2315 if (r < 0) {
2316 s->enabled = SD_EVENT_OFF;
2317 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2318 return r;
2319 }
2320
2321 break;
2322
2323 case SOURCE_CHILD:
2324
2325 if (s->enabled == SD_EVENT_OFF)
2326 s->event->n_enabled_child_sources++;
2327
2328 s->enabled = m;
2329
2330 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2331 /* yes, we have pidfd */
2332
2333 r = source_child_pidfd_register(s, s->enabled);
2334 if (r < 0) {
2335 s->enabled = SD_EVENT_OFF;
2336 s->event->n_enabled_child_sources--;
2337 return r;
2338 }
2339 } else {
2340 /* no pidfd, or something other to watch for than WEXITED */
2341
2342 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2343 if (r < 0) {
2344 s->enabled = SD_EVENT_OFF;
2345 s->event->n_enabled_child_sources--;
2346 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2347 return r;
2348 }
2349 }
2350
2351 break;
2352
2353 case SOURCE_EXIT:
2354 s->enabled = m;
2355 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2356 break;
2357
2358 case SOURCE_DEFER:
2359 case SOURCE_POST:
2360 case SOURCE_INOTIFY:
2361 s->enabled = m;
2362 break;
2363
2364 default:
2365 assert_not_reached("Wut? I shouldn't exist.");
2366 }
2367 }
2368
2369 if (s->pending)
2370 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2371
2372 if (s->prepare)
2373 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2374
2375 return 0;
2376 }
2377
2378 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2379 assert_return(s, -EINVAL);
2380 assert_return(usec, -EINVAL);
2381 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2382 assert_return(!event_pid_changed(s->event), -ECHILD);
2383
2384 *usec = s->time.next;
2385 return 0;
2386 }
2387
2388 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2389 struct clock_data *d;
2390 int r;
2391
2392 assert_return(s, -EINVAL);
2393 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2394 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2395 assert_return(!event_pid_changed(s->event), -ECHILD);
2396
2397 r = source_set_pending(s, false);
2398 if (r < 0)
2399 return r;
2400
2401 s->time.next = usec;
2402
2403 d = event_get_clock_data(s->event, s->type);
2404 assert(d);
2405
2406 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2407 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2408 d->needs_rearm = true;
2409
2410 return 0;
2411 }
2412
2413 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2414 assert_return(s, -EINVAL);
2415 assert_return(usec, -EINVAL);
2416 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2417 assert_return(!event_pid_changed(s->event), -ECHILD);
2418
2419 *usec = s->time.accuracy;
2420 return 0;
2421 }
2422
2423 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2424 struct clock_data *d;
2425 int r;
2426
2427 assert_return(s, -EINVAL);
2428 assert_return(usec != (uint64_t) -1, -EINVAL);
2429 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2430 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2431 assert_return(!event_pid_changed(s->event), -ECHILD);
2432
2433 r = source_set_pending(s, false);
2434 if (r < 0)
2435 return r;
2436
2437 if (usec == 0)
2438 usec = DEFAULT_ACCURACY_USEC;
2439
2440 s->time.accuracy = usec;
2441
2442 d = event_get_clock_data(s->event, s->type);
2443 assert(d);
2444
2445 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2446 d->needs_rearm = true;
2447
2448 return 0;
2449 }
2450
2451 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2452 assert_return(s, -EINVAL);
2453 assert_return(clock, -EINVAL);
2454 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2455 assert_return(!event_pid_changed(s->event), -ECHILD);
2456
2457 *clock = event_source_type_to_clock(s->type);
2458 return 0;
2459 }
2460
2461 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2462 assert_return(s, -EINVAL);
2463 assert_return(pid, -EINVAL);
2464 assert_return(s->type == SOURCE_CHILD, -EDOM);
2465 assert_return(!event_pid_changed(s->event), -ECHILD);
2466
2467 *pid = s->child.pid;
2468 return 0;
2469 }
2470
2471 _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2472 assert_return(s, -EINVAL);
2473 assert_return(s->type == SOURCE_CHILD, -EDOM);
2474 assert_return(!event_pid_changed(s->event), -ECHILD);
2475
2476 if (s->child.pidfd < 0)
2477 return -EOPNOTSUPP;
2478
2479 return s->child.pidfd;
2480 }
2481
2482 _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2483 assert_return(s, -EINVAL);
2484 assert_return(s->type == SOURCE_CHILD, -EDOM);
2485 assert_return(!event_pid_changed(s->event), -ECHILD);
2486 assert_return(SIGNAL_VALID(sig), -EINVAL);
2487
2488 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2489 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2490 * available. */
2491 if (s->child.exited)
2492 return -ESRCH;
2493
2494 if (s->child.pidfd >= 0) {
2495 siginfo_t copy;
2496
2497 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2498 * structure here */
2499 if (si)
2500 copy = *si;
2501
2502 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2503 /* Let's propagate the error only if the system call is not implemented or prohibited */
2504 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2505 return -errno;
2506 } else
2507 return 0;
2508 }
2509
2510 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2511 * this here. */
2512 if (flags != 0)
2513 return -EOPNOTSUPP;
2514
2515 if (si) {
2516 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2517 siginfo_t copy = *si;
2518
2519 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2520 return -errno;
2521 } else if (kill(s->child.pid, sig) < 0)
2522 return -errno;
2523
2524 return 0;
2525 }
2526
2527 _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2528 assert_return(s, -EINVAL);
2529 assert_return(s->type == SOURCE_CHILD, -EDOM);
2530
2531 if (s->child.pidfd < 0)
2532 return -EOPNOTSUPP;
2533
2534 return s->child.pidfd_owned;
2535 }
2536
2537 _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2538 assert_return(s, -EINVAL);
2539 assert_return(s->type == SOURCE_CHILD, -EDOM);
2540
2541 if (s->child.pidfd < 0)
2542 return -EOPNOTSUPP;
2543
2544 s->child.pidfd_owned = own;
2545 return 0;
2546 }
2547
2548 _public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2549 assert_return(s, -EINVAL);
2550 assert_return(s->type == SOURCE_CHILD, -EDOM);
2551
2552 return s->child.process_owned;
2553 }
2554
2555 _public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2556 assert_return(s, -EINVAL);
2557 assert_return(s->type == SOURCE_CHILD, -EDOM);
2558
2559 s->child.process_owned = own;
2560 return 0;
2561 }
2562
2563 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2564 assert_return(s, -EINVAL);
2565 assert_return(mask, -EINVAL);
2566 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2567 assert_return(!event_pid_changed(s->event), -ECHILD);
2568
2569 *mask = s->inotify.mask;
2570 return 0;
2571 }
2572
2573 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2574 int r;
2575
2576 assert_return(s, -EINVAL);
2577 assert_return(s->type != SOURCE_EXIT, -EDOM);
2578 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2579 assert_return(!event_pid_changed(s->event), -ECHILD);
2580
2581 if (s->prepare == callback)
2582 return 0;
2583
2584 if (callback && s->prepare) {
2585 s->prepare = callback;
2586 return 0;
2587 }
2588
2589 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2590 if (r < 0)
2591 return r;
2592
2593 s->prepare = callback;
2594
2595 if (callback) {
2596 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2597 if (r < 0)
2598 return r;
2599 } else
2600 prioq_remove(s->event->prepare, s, &s->prepare_index);
2601
2602 return 0;
2603 }
2604
2605 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2606 assert_return(s, NULL);
2607
2608 return s->userdata;
2609 }
2610
2611 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2612 void *ret;
2613
2614 assert_return(s, NULL);
2615
2616 ret = s->userdata;
2617 s->userdata = userdata;
2618
2619 return ret;
2620 }
2621
2622 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2623 usec_t c;
2624 assert(e);
2625 assert(a <= b);
2626
2627 if (a <= 0)
2628 return 0;
2629 if (a >= USEC_INFINITY)
2630 return USEC_INFINITY;
2631
2632 if (b <= a + 1)
2633 return a;
2634
2635 initialize_perturb(e);
2636
2637 /*
2638 Find a good time to wake up again between times a and b. We
2639 have two goals here:
2640
2641 a) We want to wake up as seldom as possible, hence prefer
2642 later times over earlier times.
2643
2644 b) But if we have to wake up, then let's make sure to
2645 dispatch as much as possible on the entire system.
2646
2647 We implement this by waking up everywhere at the same time
2648 within any given minute if we can, synchronised via the
2649 perturbation value determined from the boot ID. If we can't,
2650 then we try to find the same spot in every 10s, then 1s and
2651 then 250ms step. Otherwise, we pick the last possible time
2652 to wake up.
2653 */
2654
2655 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2656 if (c >= b) {
2657 if (_unlikely_(c < USEC_PER_MINUTE))
2658 return b;
2659
2660 c -= USEC_PER_MINUTE;
2661 }
2662
2663 if (c >= a)
2664 return c;
2665
2666 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2667 if (c >= b) {
2668 if (_unlikely_(c < USEC_PER_SEC*10))
2669 return b;
2670
2671 c -= USEC_PER_SEC*10;
2672 }
2673
2674 if (c >= a)
2675 return c;
2676
2677 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2678 if (c >= b) {
2679 if (_unlikely_(c < USEC_PER_SEC))
2680 return b;
2681
2682 c -= USEC_PER_SEC;
2683 }
2684
2685 if (c >= a)
2686 return c;
2687
2688 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2689 if (c >= b) {
2690 if (_unlikely_(c < USEC_PER_MSEC*250))
2691 return b;
2692
2693 c -= USEC_PER_MSEC*250;
2694 }
2695
2696 if (c >= a)
2697 return c;
2698
2699 return b;
2700 }
2701
2702 static int event_arm_timer(
2703 sd_event *e,
2704 struct clock_data *d) {
2705
2706 struct itimerspec its = {};
2707 sd_event_source *a, *b;
2708 usec_t t;
2709 int r;
2710
2711 assert(e);
2712 assert(d);
2713
2714 if (!d->needs_rearm)
2715 return 0;
2716 else
2717 d->needs_rearm = false;
2718
2719 a = prioq_peek(d->earliest);
2720 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2721
2722 if (d->fd < 0)
2723 return 0;
2724
2725 if (d->next == USEC_INFINITY)
2726 return 0;
2727
2728 /* disarm */
2729 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2730 if (r < 0)
2731 return r;
2732
2733 d->next = USEC_INFINITY;
2734 return 0;
2735 }
2736
2737 b = prioq_peek(d->latest);
2738 assert_se(b && b->enabled != SD_EVENT_OFF);
2739
2740 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2741 if (d->next == t)
2742 return 0;
2743
2744 assert_se(d->fd >= 0);
2745
2746 if (t == 0) {
2747 /* We don' want to disarm here, just mean some time looooong ago. */
2748 its.it_value.tv_sec = 0;
2749 its.it_value.tv_nsec = 1;
2750 } else
2751 timespec_store(&its.it_value, t);
2752
2753 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2754 if (r < 0)
2755 return -errno;
2756
2757 d->next = t;
2758 return 0;
2759 }
2760
2761 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2762 assert(e);
2763 assert(s);
2764 assert(s->type == SOURCE_IO);
2765
2766 /* If the event source was already pending, we just OR in the
2767 * new revents, otherwise we reset the value. The ORing is
2768 * necessary to handle EPOLLONESHOT events properly where
2769 * readability might happen independently of writability, and
2770 * we need to keep track of both */
2771
2772 if (s->pending)
2773 s->io.revents |= revents;
2774 else
2775 s->io.revents = revents;
2776
2777 return source_set_pending(s, true);
2778 }
2779
2780 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2781 uint64_t x;
2782 ssize_t ss;
2783
2784 assert(e);
2785 assert(fd >= 0);
2786
2787 assert_return(events == EPOLLIN, -EIO);
2788
2789 ss = read(fd, &x, sizeof(x));
2790 if (ss < 0) {
2791 if (IN_SET(errno, EAGAIN, EINTR))
2792 return 0;
2793
2794 return -errno;
2795 }
2796
2797 if (_unlikely_(ss != sizeof(x)))
2798 return -EIO;
2799
2800 if (next)
2801 *next = USEC_INFINITY;
2802
2803 return 0;
2804 }
2805
2806 static int process_timer(
2807 sd_event *e,
2808 usec_t n,
2809 struct clock_data *d) {
2810
2811 sd_event_source *s;
2812 int r;
2813
2814 assert(e);
2815 assert(d);
2816
2817 for (;;) {
2818 s = prioq_peek(d->earliest);
2819 if (!s ||
2820 s->time.next > n ||
2821 s->enabled == SD_EVENT_OFF ||
2822 s->pending)
2823 break;
2824
2825 r = source_set_pending(s, true);
2826 if (r < 0)
2827 return r;
2828
2829 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2830 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2831 d->needs_rearm = true;
2832 }
2833
2834 return 0;
2835 }
2836
2837 static int process_child(sd_event *e) {
2838 sd_event_source *s;
2839 Iterator i;
2840 int r;
2841
2842 assert(e);
2843
2844 e->need_process_child = false;
2845
2846 /*
2847 So, this is ugly. We iteratively invoke waitid() with P_PID
2848 + WNOHANG for each PID we wait for, instead of using
2849 P_ALL. This is because we only want to get child
2850 information of very specific child processes, and not all
2851 of them. We might not have processed the SIGCHLD even of a
2852 previous invocation and we don't want to maintain a
2853 unbounded *per-child* event queue, hence we really don't
2854 want anything flushed out of the kernel's queue that we
2855 don't care about. Since this is O(n) this means that if you
2856 have a lot of processes you probably want to handle SIGCHLD
2857 yourself.
2858
2859 We do not reap the children here (by using WNOWAIT), this
2860 is only done after the event source is dispatched so that
2861 the callback still sees the process as a zombie.
2862 */
2863
2864 HASHMAP_FOREACH(s, e->child_sources, i) {
2865 assert(s->type == SOURCE_CHILD);
2866
2867 if (s->pending)
2868 continue;
2869
2870 if (s->enabled == SD_EVENT_OFF)
2871 continue;
2872
2873 if (s->child.exited)
2874 continue;
2875
2876 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2877 continue;
2878
2879 zero(s->child.siginfo);
2880 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2881 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2882 if (r < 0)
2883 return -errno;
2884
2885 if (s->child.siginfo.si_pid != 0) {
2886 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2887
2888 if (zombie)
2889 s->child.exited = true;
2890
2891 if (!zombie && (s->child.options & WEXITED)) {
2892 /* If the child isn't dead then let's
2893 * immediately remove the state change
2894 * from the queue, since there's no
2895 * benefit in leaving it queued */
2896
2897 assert(s->child.options & (WSTOPPED|WCONTINUED));
2898 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2899 }
2900
2901 r = source_set_pending(s, true);
2902 if (r < 0)
2903 return r;
2904 }
2905 }
2906
2907 return 0;
2908 }
2909
2910 static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2911 assert(e);
2912 assert(s);
2913 assert(s->type == SOURCE_CHILD);
2914
2915 if (s->pending)
2916 return 0;
2917
2918 if (s->enabled == SD_EVENT_OFF)
2919 return 0;
2920
2921 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2922 return 0;
2923
2924 zero(s->child.siginfo);
2925 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2926 return -errno;
2927
2928 if (s->child.siginfo.si_pid == 0)
2929 return 0;
2930
2931 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2932 s->child.exited = true;
2933
2934 return source_set_pending(s, true);
2935 }
2936
2937 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2938 bool read_one = false;
2939 int r;
2940
2941 assert(e);
2942 assert(d);
2943 assert_return(events == EPOLLIN, -EIO);
2944
2945 /* If there's a signal queued on this priority and SIGCHLD is
2946 on this priority too, then make sure to recheck the
2947 children we watch. This is because we only ever dequeue
2948 the first signal per priority, and if we dequeue one, and
2949 SIGCHLD might be enqueued later we wouldn't know, but we
2950 might have higher priority children we care about hence we
2951 need to check that explicitly. */
2952
2953 if (sigismember(&d->sigset, SIGCHLD))
2954 e->need_process_child = true;
2955
2956 /* If there's already an event source pending for this
2957 * priority we don't read another */
2958 if (d->current)
2959 return 0;
2960
2961 for (;;) {
2962 struct signalfd_siginfo si;
2963 ssize_t n;
2964 sd_event_source *s = NULL;
2965
2966 n = read(d->fd, &si, sizeof(si));
2967 if (n < 0) {
2968 if (IN_SET(errno, EAGAIN, EINTR))
2969 return read_one;
2970
2971 return -errno;
2972 }
2973
2974 if (_unlikely_(n != sizeof(si)))
2975 return -EIO;
2976
2977 assert(SIGNAL_VALID(si.ssi_signo));
2978
2979 read_one = true;
2980
2981 if (e->signal_sources)
2982 s = e->signal_sources[si.ssi_signo];
2983 if (!s)
2984 continue;
2985 if (s->pending)
2986 continue;
2987
2988 s->signal.siginfo = si;
2989 d->current = s;
2990
2991 r = source_set_pending(s, true);
2992 if (r < 0)
2993 return r;
2994
2995 return 1;
2996 }
2997 }
2998
2999 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3000 ssize_t n;
3001
3002 assert(e);
3003 assert(d);
3004
3005 assert_return(revents == EPOLLIN, -EIO);
3006
3007 /* If there's already an event source pending for this priority, don't read another */
3008 if (d->n_pending > 0)
3009 return 0;
3010
3011 /* Is the read buffer non-empty? If so, let's not read more */
3012 if (d->buffer_filled > 0)
3013 return 0;
3014
3015 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3016 if (n < 0) {
3017 if (IN_SET(errno, EAGAIN, EINTR))
3018 return 0;
3019
3020 return -errno;
3021 }
3022
3023 assert(n > 0);
3024 d->buffer_filled = (size_t) n;
3025 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3026
3027 return 1;
3028 }
3029
3030 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3031 assert(e);
3032 assert(d);
3033 assert(sz <= d->buffer_filled);
3034
3035 if (sz == 0)
3036 return;
3037
3038 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3039 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3040 d->buffer_filled -= sz;
3041
3042 if (d->buffer_filled == 0)
3043 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3044 }
3045
3046 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3047 int r;
3048
3049 assert(e);
3050 assert(d);
3051
3052 /* If there's already an event source pending for this priority, don't read another */
3053 if (d->n_pending > 0)
3054 return 0;
3055
3056 while (d->buffer_filled > 0) {
3057 size_t sz;
3058
3059 /* Let's validate that the event structures are complete */
3060 if (d->buffer_filled < offsetof(struct inotify_event, name))
3061 return -EIO;
3062
3063 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3064 if (d->buffer_filled < sz)
3065 return -EIO;
3066
3067 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3068 struct inode_data *inode_data;
3069 Iterator i;
3070
3071 /* The queue overran, let's pass this event to all event sources connected to this inotify
3072 * object */
3073
3074 HASHMAP_FOREACH(inode_data, d->inodes, i) {
3075 sd_event_source *s;
3076
3077 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3078
3079 if (s->enabled == SD_EVENT_OFF)
3080 continue;
3081
3082 r = source_set_pending(s, true);
3083 if (r < 0)
3084 return r;
3085 }
3086 }
3087 } else {
3088 struct inode_data *inode_data;
3089 sd_event_source *s;
3090
3091 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3092 * our watch descriptor table. */
3093 if (d->buffer.ev.mask & IN_IGNORED) {
3094
3095 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3096 if (!inode_data) {
3097 event_inotify_data_drop(e, d, sz);
3098 continue;
3099 }
3100
3101 /* The watch descriptor was removed by the kernel, let's drop it here too */
3102 inode_data->wd = -1;
3103 } else {
3104 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3105 if (!inode_data) {
3106 event_inotify_data_drop(e, d, sz);
3107 continue;
3108 }
3109 }
3110
3111 /* Trigger all event sources that are interested in these events. Also trigger all event
3112 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3113 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3114
3115 if (s->enabled == SD_EVENT_OFF)
3116 continue;
3117
3118 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3119 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3120 continue;
3121
3122 r = source_set_pending(s, true);
3123 if (r < 0)
3124 return r;
3125 }
3126 }
3127
3128 /* Something pending now? If so, let's finish, otherwise let's read more. */
3129 if (d->n_pending > 0)
3130 return 1;
3131 }
3132
3133 return 0;
3134 }
3135
3136 static int process_inotify(sd_event *e) {
3137 struct inotify_data *d;
3138 int r, done = 0;
3139
3140 assert(e);
3141
3142 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3143 r = event_inotify_data_process(e, d);
3144 if (r < 0)
3145 return r;
3146 if (r > 0)
3147 done ++;
3148 }
3149
3150 return done;
3151 }
3152
3153 static int source_dispatch(sd_event_source *s) {
3154 EventSourceType saved_type;
3155 int r = 0;
3156
3157 assert(s);
3158 assert(s->pending || s->type == SOURCE_EXIT);
3159
3160 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3161 * the event. */
3162 saved_type = s->type;
3163
3164 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3165 r = source_set_pending(s, false);
3166 if (r < 0)
3167 return r;
3168 }
3169
3170 if (s->type != SOURCE_POST) {
3171 sd_event_source *z;
3172 Iterator i;
3173
3174 /* If we execute a non-post source, let's mark all
3175 * post sources as pending */
3176
3177 SET_FOREACH(z, s->event->post_sources, i) {
3178 if (z->enabled == SD_EVENT_OFF)
3179 continue;
3180
3181 r = source_set_pending(z, true);
3182 if (r < 0)
3183 return r;
3184 }
3185 }
3186
3187 if (s->enabled == SD_EVENT_ONESHOT) {
3188 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3189 if (r < 0)
3190 return r;
3191 }
3192
3193 s->dispatching = true;
3194
3195 switch (s->type) {
3196
3197 case SOURCE_IO:
3198 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3199 break;
3200
3201 case SOURCE_TIME_REALTIME:
3202 case SOURCE_TIME_BOOTTIME:
3203 case SOURCE_TIME_MONOTONIC:
3204 case SOURCE_TIME_REALTIME_ALARM:
3205 case SOURCE_TIME_BOOTTIME_ALARM:
3206 r = s->time.callback(s, s->time.next, s->userdata);
3207 break;
3208
3209 case SOURCE_SIGNAL:
3210 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3211 break;
3212
3213 case SOURCE_CHILD: {
3214 bool zombie;
3215
3216 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3217
3218 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3219
3220 /* Now, reap the PID for good. */
3221 if (zombie) {
3222 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3223 s->child.waited = true;
3224 }
3225
3226 break;
3227 }
3228
3229 case SOURCE_DEFER:
3230 r = s->defer.callback(s, s->userdata);
3231 break;
3232
3233 case SOURCE_POST:
3234 r = s->post.callback(s, s->userdata);
3235 break;
3236
3237 case SOURCE_EXIT:
3238 r = s->exit.callback(s, s->userdata);
3239 break;
3240
3241 case SOURCE_INOTIFY: {
3242 struct sd_event *e = s->event;
3243 struct inotify_data *d;
3244 size_t sz;
3245
3246 assert(s->inotify.inode_data);
3247 assert_se(d = s->inotify.inode_data->inotify_data);
3248
3249 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3250 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3251 assert(d->buffer_filled >= sz);
3252
3253 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3254
3255 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3256 * buffer. */
3257 if (d->n_pending == 0)
3258 event_inotify_data_drop(e, d, sz);
3259
3260 break;
3261 }
3262
3263 case SOURCE_WATCHDOG:
3264 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3265 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3266 assert_not_reached("Wut? I shouldn't exist.");
3267 }
3268
3269 s->dispatching = false;
3270
3271 if (r < 0)
3272 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3273 strna(s->description), event_source_type_to_string(saved_type));
3274
3275 if (s->n_ref == 0)
3276 source_free(s);
3277 else if (r < 0)
3278 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3279
3280 return 1;
3281 }
3282
3283 static int event_prepare(sd_event *e) {
3284 int r;
3285
3286 assert(e);
3287
3288 for (;;) {
3289 sd_event_source *s;
3290
3291 s = prioq_peek(e->prepare);
3292 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3293 break;
3294
3295 s->prepare_iteration = e->iteration;
3296 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3297 if (r < 0)
3298 return r;
3299
3300 assert(s->prepare);
3301
3302 s->dispatching = true;
3303 r = s->prepare(s, s->userdata);
3304 s->dispatching = false;
3305
3306 if (r < 0)
3307 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3308 strna(s->description), event_source_type_to_string(s->type));
3309
3310 if (s->n_ref == 0)
3311 source_free(s);
3312 else if (r < 0)
3313 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3314 }
3315
3316 return 0;
3317 }
3318
3319 static int dispatch_exit(sd_event *e) {
3320 sd_event_source *p;
3321 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3322 int r;
3323
3324 assert(e);
3325
3326 p = prioq_peek(e->exit);
3327 if (!p || p->enabled == SD_EVENT_OFF) {
3328 e->state = SD_EVENT_FINISHED;
3329 return 0;
3330 }
3331
3332 ref = sd_event_ref(e);
3333 e->iteration++;
3334 e->state = SD_EVENT_EXITING;
3335 r = source_dispatch(p);
3336 e->state = SD_EVENT_INITIAL;
3337 return r;
3338 }
3339
3340 static sd_event_source* event_next_pending(sd_event *e) {
3341 sd_event_source *p;
3342
3343 assert(e);
3344
3345 p = prioq_peek(e->pending);
3346 if (!p)
3347 return NULL;
3348
3349 if (p->enabled == SD_EVENT_OFF)
3350 return NULL;
3351
3352 return p;
3353 }
3354
3355 static int arm_watchdog(sd_event *e) {
3356 struct itimerspec its = {};
3357 usec_t t;
3358 int r;
3359
3360 assert(e);
3361 assert(e->watchdog_fd >= 0);
3362
3363 t = sleep_between(e,
3364 e->watchdog_last + (e->watchdog_period / 2),
3365 e->watchdog_last + (e->watchdog_period * 3 / 4));
3366
3367 timespec_store(&its.it_value, t);
3368
3369 /* Make sure we never set the watchdog to 0, which tells the
3370 * kernel to disable it. */
3371 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3372 its.it_value.tv_nsec = 1;
3373
3374 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3375 if (r < 0)
3376 return -errno;
3377
3378 return 0;
3379 }
3380
3381 static int process_watchdog(sd_event *e) {
3382 assert(e);
3383
3384 if (!e->watchdog)
3385 return 0;
3386
3387 /* Don't notify watchdog too often */
3388 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3389 return 0;
3390
3391 sd_notify(false, "WATCHDOG=1");
3392 e->watchdog_last = e->timestamp.monotonic;
3393
3394 return arm_watchdog(e);
3395 }
3396
3397 static void event_close_inode_data_fds(sd_event *e) {
3398 struct inode_data *d;
3399
3400 assert(e);
3401
3402 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3403 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3404 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3405 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3406 * compromise. */
3407
3408 while ((d = e->inode_data_to_close)) {
3409 assert(d->fd >= 0);
3410 d->fd = safe_close(d->fd);
3411
3412 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3413 }
3414 }
3415
3416 _public_ int sd_event_prepare(sd_event *e) {
3417 int r;
3418
3419 assert_return(e, -EINVAL);
3420 assert_return(e = event_resolve(e), -ENOPKG);
3421 assert_return(!event_pid_changed(e), -ECHILD);
3422 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3423 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3424
3425 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3426 * this check here once, since gettid() is typically not cached, and thus want to minimize
3427 * syscalls */
3428 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3429
3430 if (e->exit_requested)
3431 goto pending;
3432
3433 e->iteration++;
3434
3435 e->state = SD_EVENT_PREPARING;
3436 r = event_prepare(e);
3437 e->state = SD_EVENT_INITIAL;
3438 if (r < 0)
3439 return r;
3440
3441 r = event_arm_timer(e, &e->realtime);
3442 if (r < 0)
3443 return r;
3444
3445 r = event_arm_timer(e, &e->boottime);
3446 if (r < 0)
3447 return r;
3448
3449 r = event_arm_timer(e, &e->monotonic);
3450 if (r < 0)
3451 return r;
3452
3453 r = event_arm_timer(e, &e->realtime_alarm);
3454 if (r < 0)
3455 return r;
3456
3457 r = event_arm_timer(e, &e->boottime_alarm);
3458 if (r < 0)
3459 return r;
3460
3461 event_close_inode_data_fds(e);
3462
3463 if (event_next_pending(e) || e->need_process_child)
3464 goto pending;
3465
3466 e->state = SD_EVENT_ARMED;
3467
3468 return 0;
3469
3470 pending:
3471 e->state = SD_EVENT_ARMED;
3472 r = sd_event_wait(e, 0);
3473 if (r == 0)
3474 e->state = SD_EVENT_ARMED;
3475
3476 return r;
3477 }
3478
3479 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3480 struct epoll_event *ev_queue;
3481 unsigned ev_queue_max;
3482 int r, m, i;
3483
3484 assert_return(e, -EINVAL);
3485 assert_return(e = event_resolve(e), -ENOPKG);
3486 assert_return(!event_pid_changed(e), -ECHILD);
3487 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3488 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3489
3490 if (e->exit_requested) {
3491 e->state = SD_EVENT_PENDING;
3492 return 1;
3493 }
3494
3495 ev_queue_max = MAX(e->n_sources, 1u);
3496 ev_queue = newa(struct epoll_event, ev_queue_max);
3497
3498 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3499 if (e->inotify_data_buffered)
3500 timeout = 0;
3501
3502 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3503 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3504 if (m < 0) {
3505 if (errno == EINTR) {
3506 e->state = SD_EVENT_PENDING;
3507 return 1;
3508 }
3509
3510 r = -errno;
3511 goto finish;
3512 }
3513
3514 triple_timestamp_get(&e->timestamp);
3515
3516 for (i = 0; i < m; i++) {
3517
3518 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3519 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3520 else {
3521 WakeupType *t = ev_queue[i].data.ptr;
3522
3523 switch (*t) {
3524
3525 case WAKEUP_EVENT_SOURCE: {
3526 sd_event_source *s = ev_queue[i].data.ptr;
3527
3528 assert(s);
3529
3530 switch (s->type) {
3531
3532 case SOURCE_IO:
3533 r = process_io(e, s, ev_queue[i].events);
3534 break;
3535
3536 case SOURCE_CHILD:
3537 r = process_pidfd(e, s, ev_queue[i].events);
3538 break;
3539
3540 default:
3541 assert_not_reached("Unexpected event source type");
3542 }
3543
3544 break;
3545 }
3546
3547 case WAKEUP_CLOCK_DATA: {
3548 struct clock_data *d = ev_queue[i].data.ptr;
3549
3550 assert(d);
3551
3552 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3553 break;
3554 }
3555
3556 case WAKEUP_SIGNAL_DATA:
3557 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3558 break;
3559
3560 case WAKEUP_INOTIFY_DATA:
3561 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3562 break;
3563
3564 default:
3565 assert_not_reached("Invalid wake-up pointer");
3566 }
3567 }
3568 if (r < 0)
3569 goto finish;
3570 }
3571
3572 r = process_watchdog(e);
3573 if (r < 0)
3574 goto finish;
3575
3576 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3577 if (r < 0)
3578 goto finish;
3579
3580 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3581 if (r < 0)
3582 goto finish;
3583
3584 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3585 if (r < 0)
3586 goto finish;
3587
3588 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3589 if (r < 0)
3590 goto finish;
3591
3592 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3593 if (r < 0)
3594 goto finish;
3595
3596 if (e->need_process_child) {
3597 r = process_child(e);
3598 if (r < 0)
3599 goto finish;
3600 }
3601
3602 r = process_inotify(e);
3603 if (r < 0)
3604 goto finish;
3605
3606 if (event_next_pending(e)) {
3607 e->state = SD_EVENT_PENDING;
3608
3609 return 1;
3610 }
3611
3612 r = 0;
3613
3614 finish:
3615 e->state = SD_EVENT_INITIAL;
3616
3617 return r;
3618 }
3619
3620 _public_ int sd_event_dispatch(sd_event *e) {
3621 sd_event_source *p;
3622 int r;
3623
3624 assert_return(e, -EINVAL);
3625 assert_return(e = event_resolve(e), -ENOPKG);
3626 assert_return(!event_pid_changed(e), -ECHILD);
3627 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3628 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3629
3630 if (e->exit_requested)
3631 return dispatch_exit(e);
3632
3633 p = event_next_pending(e);
3634 if (p) {
3635 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3636
3637 ref = sd_event_ref(e);
3638 e->state = SD_EVENT_RUNNING;
3639 r = source_dispatch(p);
3640 e->state = SD_EVENT_INITIAL;
3641 return r;
3642 }
3643
3644 e->state = SD_EVENT_INITIAL;
3645
3646 return 1;
3647 }
3648
3649 static void event_log_delays(sd_event *e) {
3650 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3651 size_t l, i;
3652
3653 p = b;
3654 l = sizeof(b);
3655 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3656 l = strpcpyf(&p, l, "%u ", e->delays[i]);
3657 e->delays[i] = 0;
3658 }
3659 log_debug("Event loop iterations: %s", b);
3660 }
3661
3662 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3663 int r;
3664
3665 assert_return(e, -EINVAL);
3666 assert_return(e = event_resolve(e), -ENOPKG);
3667 assert_return(!event_pid_changed(e), -ECHILD);
3668 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3669 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3670
3671 if (e->profile_delays && e->last_run) {
3672 usec_t this_run;
3673 unsigned l;
3674
3675 this_run = now(CLOCK_MONOTONIC);
3676
3677 l = u64log2(this_run - e->last_run);
3678 assert(l < sizeof(e->delays));
3679 e->delays[l]++;
3680
3681 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3682 event_log_delays(e);
3683 e->last_log = this_run;
3684 }
3685 }
3686
3687 r = sd_event_prepare(e);
3688 if (r == 0)
3689 /* There was nothing? Then wait... */
3690 r = sd_event_wait(e, timeout);
3691
3692 if (e->profile_delays)
3693 e->last_run = now(CLOCK_MONOTONIC);
3694
3695 if (r > 0) {
3696 /* There's something now, then let's dispatch it */
3697 r = sd_event_dispatch(e);
3698 if (r < 0)
3699 return r;
3700
3701 return 1;
3702 }
3703
3704 return r;
3705 }
3706
3707 _public_ int sd_event_loop(sd_event *e) {
3708 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3709 int r;
3710
3711 assert_return(e, -EINVAL);
3712 assert_return(e = event_resolve(e), -ENOPKG);
3713 assert_return(!event_pid_changed(e), -ECHILD);
3714 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3715
3716 ref = sd_event_ref(e);
3717
3718 while (e->state != SD_EVENT_FINISHED) {
3719 r = sd_event_run(e, (uint64_t) -1);
3720 if (r < 0)
3721 return r;
3722 }
3723
3724 return e->exit_code;
3725 }
3726
3727 _public_ int sd_event_get_fd(sd_event *e) {
3728
3729 assert_return(e, -EINVAL);
3730 assert_return(e = event_resolve(e), -ENOPKG);
3731 assert_return(!event_pid_changed(e), -ECHILD);
3732
3733 return e->epoll_fd;
3734 }
3735
3736 _public_ int sd_event_get_state(sd_event *e) {
3737 assert_return(e, -EINVAL);
3738 assert_return(e = event_resolve(e), -ENOPKG);
3739 assert_return(!event_pid_changed(e), -ECHILD);
3740
3741 return e->state;
3742 }
3743
3744 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3745 assert_return(e, -EINVAL);
3746 assert_return(e = event_resolve(e), -ENOPKG);
3747 assert_return(code, -EINVAL);
3748 assert_return(!event_pid_changed(e), -ECHILD);
3749
3750 if (!e->exit_requested)
3751 return -ENODATA;
3752
3753 *code = e->exit_code;
3754 return 0;
3755 }
3756
3757 _public_ int sd_event_exit(sd_event *e, int code) {
3758 assert_return(e, -EINVAL);
3759 assert_return(e = event_resolve(e), -ENOPKG);
3760 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3761 assert_return(!event_pid_changed(e), -ECHILD);
3762
3763 e->exit_requested = true;
3764 e->exit_code = code;
3765
3766 return 0;
3767 }
3768
3769 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3770 assert_return(e, -EINVAL);
3771 assert_return(e = event_resolve(e), -ENOPKG);
3772 assert_return(usec, -EINVAL);
3773 assert_return(!event_pid_changed(e), -ECHILD);
3774
3775 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3776 return -EOPNOTSUPP;
3777
3778 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3779 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3780 * the purpose of getting the time this doesn't matter. */
3781 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3782 return -EOPNOTSUPP;
3783
3784 if (!triple_timestamp_is_set(&e->timestamp)) {
3785 /* Implicitly fall back to now() if we never ran
3786 * before and thus have no cached time. */
3787 *usec = now(clock);
3788 return 1;
3789 }
3790
3791 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3792 return 0;
3793 }
3794
3795 _public_ int sd_event_default(sd_event **ret) {
3796 sd_event *e = NULL;
3797 int r;
3798
3799 if (!ret)
3800 return !!default_event;
3801
3802 if (default_event) {
3803 *ret = sd_event_ref(default_event);
3804 return 0;
3805 }
3806
3807 r = sd_event_new(&e);
3808 if (r < 0)
3809 return r;
3810
3811 e->default_event_ptr = &default_event;
3812 e->tid = gettid();
3813 default_event = e;
3814
3815 *ret = e;
3816 return 1;
3817 }
3818
3819 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3820 assert_return(e, -EINVAL);
3821 assert_return(e = event_resolve(e), -ENOPKG);
3822 assert_return(tid, -EINVAL);
3823 assert_return(!event_pid_changed(e), -ECHILD);
3824
3825 if (e->tid != 0) {
3826 *tid = e->tid;
3827 return 0;
3828 }
3829
3830 return -ENXIO;
3831 }
3832
3833 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3834 int r;
3835
3836 assert_return(e, -EINVAL);
3837 assert_return(e = event_resolve(e), -ENOPKG);
3838 assert_return(!event_pid_changed(e), -ECHILD);
3839
3840 if (e->watchdog == !!b)
3841 return e->watchdog;
3842
3843 if (b) {
3844 struct epoll_event ev;
3845
3846 r = sd_watchdog_enabled(false, &e->watchdog_period);
3847 if (r <= 0)
3848 return r;
3849
3850 /* Issue first ping immediately */
3851 sd_notify(false, "WATCHDOG=1");
3852 e->watchdog_last = now(CLOCK_MONOTONIC);
3853
3854 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3855 if (e->watchdog_fd < 0)
3856 return -errno;
3857
3858 r = arm_watchdog(e);
3859 if (r < 0)
3860 goto fail;
3861
3862 ev = (struct epoll_event) {
3863 .events = EPOLLIN,
3864 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3865 };
3866
3867 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3868 if (r < 0) {
3869 r = -errno;
3870 goto fail;
3871 }
3872
3873 } else {
3874 if (e->watchdog_fd >= 0) {
3875 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3876 e->watchdog_fd = safe_close(e->watchdog_fd);
3877 }
3878 }
3879
3880 e->watchdog = !!b;
3881 return e->watchdog;
3882
3883 fail:
3884 e->watchdog_fd = safe_close(e->watchdog_fd);
3885 return r;
3886 }
3887
3888 _public_ int sd_event_get_watchdog(sd_event *e) {
3889 assert_return(e, -EINVAL);
3890 assert_return(e = event_resolve(e), -ENOPKG);
3891 assert_return(!event_pid_changed(e), -ECHILD);
3892
3893 return e->watchdog;
3894 }
3895
3896 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3897 assert_return(e, -EINVAL);
3898 assert_return(e = event_resolve(e), -ENOPKG);
3899 assert_return(!event_pid_changed(e), -ECHILD);
3900
3901 *ret = e->iteration;
3902 return 0;
3903 }
3904
3905 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3906 assert_return(s, -EINVAL);
3907
3908 s->destroy_callback = callback;
3909 return 0;
3910 }
3911
3912 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3913 assert_return(s, -EINVAL);
3914
3915 if (ret)
3916 *ret = s->destroy_callback;
3917
3918 return !!s->destroy_callback;
3919 }
3920
3921 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3922 assert_return(s, -EINVAL);
3923
3924 return s->floating;
3925 }
3926
3927 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3928 assert_return(s, -EINVAL);
3929
3930 if (s->floating == !!b)
3931 return 0;
3932
3933 if (!s->event) /* Already disconnected */
3934 return -ESTALE;
3935
3936 s->floating = b;
3937
3938 if (b) {
3939 sd_event_source_ref(s);
3940 sd_event_unref(s->event);
3941 } else {
3942 sd_event_ref(s->event);
3943 sd_event_source_unref(s);
3944 }
3945
3946 return 1;
3947 }