]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
sd-event: don't allocate event queue array on stack
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "env-util.h"
13 #include "event-source.h"
14 #include "fd-util.h"
15 #include "fs-util.h"
16 #include "hashmap.h"
17 #include "list.h"
18 #include "macro.h"
19 #include "memory-util.h"
20 #include "missing_syscall.h"
21 #include "prioq.h"
22 #include "process-util.h"
23 #include "set.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "strxcpyx.h"
28 #include "time-util.h"
29
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38 }
39
40 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
53 [SOURCE_INOTIFY] = "inotify",
54 };
55
56 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event {
61 unsigned n_ref;
62
63 int epoll_fd;
64 int watchdog_fd;
65
66 Prioq *pending;
67 Prioq *prepare;
68
69 /* timerfd_create() only supports these five clocks so far. We
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
73 struct clock_data boottime;
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
77
78 usec_t perturb;
79
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
82
83 Hashmap *child_sources;
84 unsigned n_enabled_child_sources;
85
86 Set *post_sources;
87
88 Prioq *exit;
89
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
98 pid_t original_pid;
99
100 uint64_t iteration;
101 triple_timestamp timestamp;
102 int state;
103
104 bool exit_requested:1;
105 bool need_process_child:1;
106 bool watchdog:1;
107 bool profile_delays:1;
108
109 int exit_code;
110
111 pid_t tid;
112 sd_event **default_event_ptr;
113
114 usec_t watchdog_last, watchdog_period;
115
116 unsigned n_sources;
117
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
121 LIST_HEAD(sd_event_source, sources);
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
125 };
126
127 static thread_local sd_event *default_event = NULL;
128
129 static void source_disconnect(sd_event_source *s);
130 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
131
132 static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134 }
135
136 static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
138 int r;
139
140 assert(x->pending);
141 assert(y->pending);
142
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
145 return -1;
146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
147 return 1;
148
149 /* Lower priority values first */
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
153
154 /* Older entries first */
155 return CMP(x->pending_iteration, y->pending_iteration);
156 }
157
158 static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
160 int r;
161
162 assert(x->prepare);
163 assert(y->prepare);
164
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
177
178 /* Lower priority values first */
179 return CMP(x->priority, y->priority);
180 }
181
182 static int earliest_time_prioq_compare(const void *a, const void *b) {
183 const sd_event_source *x = a, *y = b;
184
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
187
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190 return -1;
191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
201 return CMP(x->time.next, y->time.next);
202 }
203
204 static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206 }
207
208 static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
213
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
216 return -1;
217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
228 }
229
230 static int exit_prioq_compare(const void *a, const void *b) {
231 const sd_event_source *x = a, *y = b;
232
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
235
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
238 return -1;
239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
240 return 1;
241
242 /* Lower priority values first */
243 return CMP(x->priority, y->priority);
244 }
245
246 static void free_clock_data(struct clock_data *d) {
247 assert(d);
248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253 }
254
255 static sd_event *event_free(sd_event *e) {
256 sd_event_source *s;
257
258 assert(e);
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
266 assert(e->n_sources == 0);
267
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
271 safe_close(e->epoll_fd);
272 safe_close(e->watchdog_fd);
273
274 free_clock_data(&e->realtime);
275 free_clock_data(&e->boottime);
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
282 prioq_free(e->exit);
283
284 free(e->signal_sources);
285 hashmap_free(e->signal_data);
286
287 hashmap_free(e->inotify_data);
288
289 hashmap_free(e->child_sources);
290 set_free(e->post_sources);
291
292 free(e->event_queue);
293
294 return mfree(e);
295 }
296
297 _public_ int sd_event_new(sd_event** ret) {
298 sd_event *e;
299 int r;
300
301 assert_return(ret, -EINVAL);
302
303 e = new(sd_event, 1);
304 if (!e)
305 return -ENOMEM;
306
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
329
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
332 goto fail;
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
344 e->profile_delays = true;
345 }
346
347 *ret = e;
348 return 0;
349
350 fail:
351 event_free(e);
352 return r;
353 }
354
355 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
356
357 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361 }
362
363 static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
366 /* We don't support people creating an event loop and keeping
367 * it around over a fork(). Let's complain. */
368
369 return e->original_pid != getpid_cached();
370 }
371
372 static void source_io_unregister(sd_event_source *s) {
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
376 if (event_pid_changed(s->event))
377 return;
378
379 if (!s->io.registered)
380 return;
381
382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
385
386 s->io.registered = false;
387 }
388
389 static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
394 struct epoll_event ev;
395 int r;
396
397 assert(s);
398 assert(s->type == SOURCE_IO);
399 assert(enabled != SD_EVENT_OFF);
400
401 ev = (struct epoll_event) {
402 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
403 .data.ptr = s,
404 };
405
406 if (s->io.registered)
407 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
408 else
409 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
410 if (r < 0)
411 return -errno;
412
413 s->io.registered = true;
414
415 return 0;
416 }
417
418 static void source_child_pidfd_unregister(sd_event_source *s) {
419 assert(s);
420 assert(s->type == SOURCE_CHILD);
421
422 if (event_pid_changed(s->event))
423 return;
424
425 if (!s->child.registered)
426 return;
427
428 if (EVENT_SOURCE_WATCH_PIDFD(s))
429 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
430 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
431 strna(s->description), event_source_type_to_string(s->type));
432
433 s->child.registered = false;
434 }
435
436 static int source_child_pidfd_register(sd_event_source *s, int enabled) {
437 int r;
438
439 assert(s);
440 assert(s->type == SOURCE_CHILD);
441 assert(enabled != SD_EVENT_OFF);
442
443 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
444 struct epoll_event ev;
445
446 ev = (struct epoll_event) {
447 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
448 .data.ptr = s,
449 };
450
451 if (s->child.registered)
452 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
453 else
454 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
455 if (r < 0)
456 return -errno;
457 }
458
459 s->child.registered = true;
460 return 0;
461 }
462
463 static clockid_t event_source_type_to_clock(EventSourceType t) {
464
465 switch (t) {
466
467 case SOURCE_TIME_REALTIME:
468 return CLOCK_REALTIME;
469
470 case SOURCE_TIME_BOOTTIME:
471 return CLOCK_BOOTTIME;
472
473 case SOURCE_TIME_MONOTONIC:
474 return CLOCK_MONOTONIC;
475
476 case SOURCE_TIME_REALTIME_ALARM:
477 return CLOCK_REALTIME_ALARM;
478
479 case SOURCE_TIME_BOOTTIME_ALARM:
480 return CLOCK_BOOTTIME_ALARM;
481
482 default:
483 return (clockid_t) -1;
484 }
485 }
486
487 static EventSourceType clock_to_event_source_type(clockid_t clock) {
488
489 switch (clock) {
490
491 case CLOCK_REALTIME:
492 return SOURCE_TIME_REALTIME;
493
494 case CLOCK_BOOTTIME:
495 return SOURCE_TIME_BOOTTIME;
496
497 case CLOCK_MONOTONIC:
498 return SOURCE_TIME_MONOTONIC;
499
500 case CLOCK_REALTIME_ALARM:
501 return SOURCE_TIME_REALTIME_ALARM;
502
503 case CLOCK_BOOTTIME_ALARM:
504 return SOURCE_TIME_BOOTTIME_ALARM;
505
506 default:
507 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
508 }
509 }
510
511 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
512 assert(e);
513
514 switch (t) {
515
516 case SOURCE_TIME_REALTIME:
517 return &e->realtime;
518
519 case SOURCE_TIME_BOOTTIME:
520 return &e->boottime;
521
522 case SOURCE_TIME_MONOTONIC:
523 return &e->monotonic;
524
525 case SOURCE_TIME_REALTIME_ALARM:
526 return &e->realtime_alarm;
527
528 case SOURCE_TIME_BOOTTIME_ALARM:
529 return &e->boottime_alarm;
530
531 default:
532 return NULL;
533 }
534 }
535
536 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
537 assert(e);
538
539 if (!d)
540 return;
541
542 hashmap_remove(e->signal_data, &d->priority);
543 safe_close(d->fd);
544 free(d);
545 }
546
547 static int event_make_signal_data(
548 sd_event *e,
549 int sig,
550 struct signal_data **ret) {
551
552 struct epoll_event ev;
553 struct signal_data *d;
554 bool added = false;
555 sigset_t ss_copy;
556 int64_t priority;
557 int r;
558
559 assert(e);
560
561 if (event_pid_changed(e))
562 return -ECHILD;
563
564 if (e->signal_sources && e->signal_sources[sig])
565 priority = e->signal_sources[sig]->priority;
566 else
567 priority = SD_EVENT_PRIORITY_NORMAL;
568
569 d = hashmap_get(e->signal_data, &priority);
570 if (d) {
571 if (sigismember(&d->sigset, sig) > 0) {
572 if (ret)
573 *ret = d;
574 return 0;
575 }
576 } else {
577 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
578 if (r < 0)
579 return r;
580
581 d = new(struct signal_data, 1);
582 if (!d)
583 return -ENOMEM;
584
585 *d = (struct signal_data) {
586 .wakeup = WAKEUP_SIGNAL_DATA,
587 .fd = -1,
588 .priority = priority,
589 };
590
591 r = hashmap_put(e->signal_data, &d->priority, d);
592 if (r < 0) {
593 free(d);
594 return r;
595 }
596
597 added = true;
598 }
599
600 ss_copy = d->sigset;
601 assert_se(sigaddset(&ss_copy, sig) >= 0);
602
603 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
604 if (r < 0) {
605 r = -errno;
606 goto fail;
607 }
608
609 d->sigset = ss_copy;
610
611 if (d->fd >= 0) {
612 if (ret)
613 *ret = d;
614 return 0;
615 }
616
617 d->fd = fd_move_above_stdio(r);
618
619 ev = (struct epoll_event) {
620 .events = EPOLLIN,
621 .data.ptr = d,
622 };
623
624 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
625 if (r < 0) {
626 r = -errno;
627 goto fail;
628 }
629
630 if (ret)
631 *ret = d;
632
633 return 0;
634
635 fail:
636 if (added)
637 event_free_signal_data(e, d);
638
639 return r;
640 }
641
642 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
643 assert(e);
644 assert(d);
645
646 /* Turns off the specified signal in the signal data
647 * object. If the signal mask of the object becomes empty that
648 * way removes it. */
649
650 if (sigismember(&d->sigset, sig) == 0)
651 return;
652
653 assert_se(sigdelset(&d->sigset, sig) >= 0);
654
655 if (sigisemptyset(&d->sigset)) {
656 /* If all the mask is all-zero we can get rid of the structure */
657 event_free_signal_data(e, d);
658 return;
659 }
660
661 assert(d->fd >= 0);
662
663 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
664 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
665 }
666
667 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
668 struct signal_data *d;
669 static const int64_t zero_priority = 0;
670
671 assert(e);
672
673 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
674 * and possibly drop the signalfd for it. */
675
676 if (sig == SIGCHLD &&
677 e->n_enabled_child_sources > 0)
678 return;
679
680 if (e->signal_sources &&
681 e->signal_sources[sig] &&
682 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
683 return;
684
685 /*
686 * The specified signal might be enabled in three different queues:
687 *
688 * 1) the one that belongs to the priority passed (if it is non-NULL)
689 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
690 * 3) the 0 priority (to cover the SIGCHLD case)
691 *
692 * Hence, let's remove it from all three here.
693 */
694
695 if (priority) {
696 d = hashmap_get(e->signal_data, priority);
697 if (d)
698 event_unmask_signal_data(e, d, sig);
699 }
700
701 if (e->signal_sources && e->signal_sources[sig]) {
702 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
703 if (d)
704 event_unmask_signal_data(e, d, sig);
705 }
706
707 d = hashmap_get(e->signal_data, &zero_priority);
708 if (d)
709 event_unmask_signal_data(e, d, sig);
710 }
711
712 static void source_disconnect(sd_event_source *s) {
713 sd_event *event;
714
715 assert(s);
716
717 if (!s->event)
718 return;
719
720 assert(s->event->n_sources > 0);
721
722 switch (s->type) {
723
724 case SOURCE_IO:
725 if (s->io.fd >= 0)
726 source_io_unregister(s);
727
728 break;
729
730 case SOURCE_TIME_REALTIME:
731 case SOURCE_TIME_BOOTTIME:
732 case SOURCE_TIME_MONOTONIC:
733 case SOURCE_TIME_REALTIME_ALARM:
734 case SOURCE_TIME_BOOTTIME_ALARM: {
735 struct clock_data *d;
736
737 d = event_get_clock_data(s->event, s->type);
738 assert(d);
739
740 prioq_remove(d->earliest, s, &s->time.earliest_index);
741 prioq_remove(d->latest, s, &s->time.latest_index);
742 d->needs_rearm = true;
743 break;
744 }
745
746 case SOURCE_SIGNAL:
747 if (s->signal.sig > 0) {
748
749 if (s->event->signal_sources)
750 s->event->signal_sources[s->signal.sig] = NULL;
751
752 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
753 }
754
755 break;
756
757 case SOURCE_CHILD:
758 if (s->child.pid > 0) {
759 if (s->enabled != SD_EVENT_OFF) {
760 assert(s->event->n_enabled_child_sources > 0);
761 s->event->n_enabled_child_sources--;
762 }
763
764 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
765 }
766
767 if (EVENT_SOURCE_WATCH_PIDFD(s))
768 source_child_pidfd_unregister(s);
769 else
770 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
771
772 break;
773
774 case SOURCE_DEFER:
775 /* nothing */
776 break;
777
778 case SOURCE_POST:
779 set_remove(s->event->post_sources, s);
780 break;
781
782 case SOURCE_EXIT:
783 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
784 break;
785
786 case SOURCE_INOTIFY: {
787 struct inode_data *inode_data;
788
789 inode_data = s->inotify.inode_data;
790 if (inode_data) {
791 struct inotify_data *inotify_data;
792 assert_se(inotify_data = inode_data->inotify_data);
793
794 /* Detach this event source from the inode object */
795 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
796 s->inotify.inode_data = NULL;
797
798 if (s->pending) {
799 assert(inotify_data->n_pending > 0);
800 inotify_data->n_pending--;
801 }
802
803 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
804 * continued to being watched. That's because inotify doesn't really have an API for that: we
805 * can only change watch masks with access to the original inode either by fd or by path. But
806 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
807 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
808 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
809 * there), but given the need for open_by_handle_at() which is privileged and not universally
810 * available this would be quite an incomplete solution. Hence we go the other way, leave the
811 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
812 * anymore after reception. Yes, this sucks, but … Linux … */
813
814 /* Maybe release the inode data (and its inotify) */
815 event_gc_inode_data(s->event, inode_data);
816 }
817
818 break;
819 }
820
821 default:
822 assert_not_reached("Wut? I shouldn't exist.");
823 }
824
825 if (s->pending)
826 prioq_remove(s->event->pending, s, &s->pending_index);
827
828 if (s->prepare)
829 prioq_remove(s->event->prepare, s, &s->prepare_index);
830
831 event = s->event;
832
833 s->event = NULL;
834 LIST_REMOVE(sources, event->sources, s);
835 event->n_sources--;
836
837 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
838 * pidfd associated with this event source, which we'll do only on source_free(). */
839
840 if (!s->floating)
841 sd_event_unref(event);
842 }
843
844 static void source_free(sd_event_source *s) {
845 assert(s);
846
847 source_disconnect(s);
848
849 if (s->type == SOURCE_IO && s->io.owned)
850 s->io.fd = safe_close(s->io.fd);
851
852 if (s->type == SOURCE_CHILD) {
853 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
854
855 if (s->child.process_owned) {
856
857 if (!s->child.exited) {
858 bool sent = false;
859
860 if (s->child.pidfd >= 0) {
861 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
862 if (errno == ESRCH) /* Already dead */
863 sent = true;
864 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
865 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
866 s->child.pid);
867 } else
868 sent = true;
869 }
870
871 if (!sent)
872 if (kill(s->child.pid, SIGKILL) < 0)
873 if (errno != ESRCH) /* Already dead */
874 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
875 s->child.pid);
876 }
877
878 if (!s->child.waited) {
879 siginfo_t si = {};
880
881 /* Reap the child if we can */
882 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
883 }
884 }
885
886 if (s->child.pidfd_owned)
887 s->child.pidfd = safe_close(s->child.pidfd);
888 }
889
890 if (s->destroy_callback)
891 s->destroy_callback(s->userdata);
892
893 free(s->description);
894 free(s);
895 }
896 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
897
898 static int source_set_pending(sd_event_source *s, bool b) {
899 int r;
900
901 assert(s);
902 assert(s->type != SOURCE_EXIT);
903
904 if (s->pending == b)
905 return 0;
906
907 s->pending = b;
908
909 if (b) {
910 s->pending_iteration = s->event->iteration;
911
912 r = prioq_put(s->event->pending, s, &s->pending_index);
913 if (r < 0) {
914 s->pending = false;
915 return r;
916 }
917 } else
918 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
919
920 if (EVENT_SOURCE_IS_TIME(s->type)) {
921 struct clock_data *d;
922
923 d = event_get_clock_data(s->event, s->type);
924 assert(d);
925
926 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
927 prioq_reshuffle(d->latest, s, &s->time.latest_index);
928 d->needs_rearm = true;
929 }
930
931 if (s->type == SOURCE_SIGNAL && !b) {
932 struct signal_data *d;
933
934 d = hashmap_get(s->event->signal_data, &s->priority);
935 if (d && d->current == s)
936 d->current = NULL;
937 }
938
939 if (s->type == SOURCE_INOTIFY) {
940
941 assert(s->inotify.inode_data);
942 assert(s->inotify.inode_data->inotify_data);
943
944 if (b)
945 s->inotify.inode_data->inotify_data->n_pending ++;
946 else {
947 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
948 s->inotify.inode_data->inotify_data->n_pending --;
949 }
950 }
951
952 return 0;
953 }
954
955 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
956 sd_event_source *s;
957
958 assert(e);
959
960 s = new(sd_event_source, 1);
961 if (!s)
962 return NULL;
963
964 *s = (struct sd_event_source) {
965 .n_ref = 1,
966 .event = e,
967 .floating = floating,
968 .type = type,
969 .pending_index = PRIOQ_IDX_NULL,
970 .prepare_index = PRIOQ_IDX_NULL,
971 };
972
973 if (!floating)
974 sd_event_ref(e);
975
976 LIST_PREPEND(sources, e->sources, s);
977 e->n_sources++;
978
979 return s;
980 }
981
982 _public_ int sd_event_add_io(
983 sd_event *e,
984 sd_event_source **ret,
985 int fd,
986 uint32_t events,
987 sd_event_io_handler_t callback,
988 void *userdata) {
989
990 _cleanup_(source_freep) sd_event_source *s = NULL;
991 int r;
992
993 assert_return(e, -EINVAL);
994 assert_return(e = event_resolve(e), -ENOPKG);
995 assert_return(fd >= 0, -EBADF);
996 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
997 assert_return(callback, -EINVAL);
998 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
999 assert_return(!event_pid_changed(e), -ECHILD);
1000
1001 s = source_new(e, !ret, SOURCE_IO);
1002 if (!s)
1003 return -ENOMEM;
1004
1005 s->wakeup = WAKEUP_EVENT_SOURCE;
1006 s->io.fd = fd;
1007 s->io.events = events;
1008 s->io.callback = callback;
1009 s->userdata = userdata;
1010 s->enabled = SD_EVENT_ON;
1011
1012 r = source_io_register(s, s->enabled, events);
1013 if (r < 0)
1014 return r;
1015
1016 if (ret)
1017 *ret = s;
1018 TAKE_PTR(s);
1019
1020 return 0;
1021 }
1022
1023 static void initialize_perturb(sd_event *e) {
1024 sd_id128_t bootid = {};
1025
1026 /* When we sleep for longer, we try to realign the wakeup to
1027 the same time within each minute/second/250ms, so that
1028 events all across the system can be coalesced into a single
1029 CPU wakeup. However, let's take some system-specific
1030 randomness for this value, so that in a network of systems
1031 with synced clocks timer events are distributed a
1032 bit. Here, we calculate a perturbation usec offset from the
1033 boot ID. */
1034
1035 if (_likely_(e->perturb != USEC_INFINITY))
1036 return;
1037
1038 if (sd_id128_get_boot(&bootid) >= 0)
1039 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1040 }
1041
1042 static int event_setup_timer_fd(
1043 sd_event *e,
1044 struct clock_data *d,
1045 clockid_t clock) {
1046
1047 struct epoll_event ev;
1048 int r, fd;
1049
1050 assert(e);
1051 assert(d);
1052
1053 if (_likely_(d->fd >= 0))
1054 return 0;
1055
1056 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1057 if (fd < 0)
1058 return -errno;
1059
1060 fd = fd_move_above_stdio(fd);
1061
1062 ev = (struct epoll_event) {
1063 .events = EPOLLIN,
1064 .data.ptr = d,
1065 };
1066
1067 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1068 if (r < 0) {
1069 safe_close(fd);
1070 return -errno;
1071 }
1072
1073 d->fd = fd;
1074 return 0;
1075 }
1076
1077 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1078 assert(s);
1079
1080 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1081 }
1082
1083 _public_ int sd_event_add_time(
1084 sd_event *e,
1085 sd_event_source **ret,
1086 clockid_t clock,
1087 uint64_t usec,
1088 uint64_t accuracy,
1089 sd_event_time_handler_t callback,
1090 void *userdata) {
1091
1092 EventSourceType type;
1093 _cleanup_(source_freep) sd_event_source *s = NULL;
1094 struct clock_data *d;
1095 int r;
1096
1097 assert_return(e, -EINVAL);
1098 assert_return(e = event_resolve(e), -ENOPKG);
1099 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1100 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1101 assert_return(!event_pid_changed(e), -ECHILD);
1102
1103 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1104 return -EOPNOTSUPP;
1105
1106 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1107 if (type < 0)
1108 return -EOPNOTSUPP;
1109
1110 if (!callback)
1111 callback = time_exit_callback;
1112
1113 d = event_get_clock_data(e, type);
1114 assert(d);
1115
1116 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1117 if (r < 0)
1118 return r;
1119
1120 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1121 if (r < 0)
1122 return r;
1123
1124 if (d->fd < 0) {
1125 r = event_setup_timer_fd(e, d, clock);
1126 if (r < 0)
1127 return r;
1128 }
1129
1130 s = source_new(e, !ret, type);
1131 if (!s)
1132 return -ENOMEM;
1133
1134 s->time.next = usec;
1135 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1136 s->time.callback = callback;
1137 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1138 s->userdata = userdata;
1139 s->enabled = SD_EVENT_ONESHOT;
1140
1141 d->needs_rearm = true;
1142
1143 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1144 if (r < 0)
1145 return r;
1146
1147 r = prioq_put(d->latest, s, &s->time.latest_index);
1148 if (r < 0)
1149 return r;
1150
1151 if (ret)
1152 *ret = s;
1153 TAKE_PTR(s);
1154
1155 return 0;
1156 }
1157
1158 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1159 assert(s);
1160
1161 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1162 }
1163
1164 _public_ int sd_event_add_signal(
1165 sd_event *e,
1166 sd_event_source **ret,
1167 int sig,
1168 sd_event_signal_handler_t callback,
1169 void *userdata) {
1170
1171 _cleanup_(source_freep) sd_event_source *s = NULL;
1172 struct signal_data *d;
1173 int r;
1174
1175 assert_return(e, -EINVAL);
1176 assert_return(e = event_resolve(e), -ENOPKG);
1177 assert_return(SIGNAL_VALID(sig), -EINVAL);
1178 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1179 assert_return(!event_pid_changed(e), -ECHILD);
1180
1181 if (!callback)
1182 callback = signal_exit_callback;
1183
1184 r = signal_is_blocked(sig);
1185 if (r < 0)
1186 return r;
1187 if (r == 0)
1188 return -EBUSY;
1189
1190 if (!e->signal_sources) {
1191 e->signal_sources = new0(sd_event_source*, _NSIG);
1192 if (!e->signal_sources)
1193 return -ENOMEM;
1194 } else if (e->signal_sources[sig])
1195 return -EBUSY;
1196
1197 s = source_new(e, !ret, SOURCE_SIGNAL);
1198 if (!s)
1199 return -ENOMEM;
1200
1201 s->signal.sig = sig;
1202 s->signal.callback = callback;
1203 s->userdata = userdata;
1204 s->enabled = SD_EVENT_ON;
1205
1206 e->signal_sources[sig] = s;
1207
1208 r = event_make_signal_data(e, sig, &d);
1209 if (r < 0)
1210 return r;
1211
1212 /* Use the signal name as description for the event source by default */
1213 (void) sd_event_source_set_description(s, signal_to_string(sig));
1214
1215 if (ret)
1216 *ret = s;
1217 TAKE_PTR(s);
1218
1219 return 0;
1220 }
1221
1222 static bool shall_use_pidfd(void) {
1223 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1224 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1225 }
1226
1227 _public_ int sd_event_add_child(
1228 sd_event *e,
1229 sd_event_source **ret,
1230 pid_t pid,
1231 int options,
1232 sd_event_child_handler_t callback,
1233 void *userdata) {
1234
1235 _cleanup_(source_freep) sd_event_source *s = NULL;
1236 int r;
1237
1238 assert_return(e, -EINVAL);
1239 assert_return(e = event_resolve(e), -ENOPKG);
1240 assert_return(pid > 1, -EINVAL);
1241 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1242 assert_return(options != 0, -EINVAL);
1243 assert_return(callback, -EINVAL);
1244 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1245 assert_return(!event_pid_changed(e), -ECHILD);
1246
1247 if (e->n_enabled_child_sources == 0) {
1248 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1249 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1250 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1251 * take effect.
1252 *
1253 * (As an optimization we only do this check on the first child event source created.) */
1254 r = signal_is_blocked(SIGCHLD);
1255 if (r < 0)
1256 return r;
1257 if (r == 0)
1258 return -EBUSY;
1259 }
1260
1261 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1262 if (r < 0)
1263 return r;
1264
1265 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1266 return -EBUSY;
1267
1268 s = source_new(e, !ret, SOURCE_CHILD);
1269 if (!s)
1270 return -ENOMEM;
1271
1272 s->wakeup = WAKEUP_EVENT_SOURCE;
1273 s->child.pid = pid;
1274 s->child.options = options;
1275 s->child.callback = callback;
1276 s->userdata = userdata;
1277 s->enabled = SD_EVENT_ONESHOT;
1278
1279 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1280 * pin the PID, and make regular waitid() handling race-free. */
1281
1282 if (shall_use_pidfd()) {
1283 s->child.pidfd = pidfd_open(s->child.pid, 0);
1284 if (s->child.pidfd < 0) {
1285 /* Propagate errors unless the syscall is not supported or blocked */
1286 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1287 return -errno;
1288 } else
1289 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1290 } else
1291 s->child.pidfd = -1;
1292
1293 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1294 if (r < 0)
1295 return r;
1296
1297 e->n_enabled_child_sources++;
1298
1299 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1300 /* We have a pidfd and we only want to watch for exit */
1301
1302 r = source_child_pidfd_register(s, s->enabled);
1303 if (r < 0) {
1304 e->n_enabled_child_sources--;
1305 return r;
1306 }
1307 } else {
1308 /* We have no pidfd or we shall wait for some other event than WEXITED */
1309
1310 r = event_make_signal_data(e, SIGCHLD, NULL);
1311 if (r < 0) {
1312 e->n_enabled_child_sources--;
1313 return r;
1314 }
1315
1316 e->need_process_child = true;
1317 }
1318
1319 if (ret)
1320 *ret = s;
1321
1322 TAKE_PTR(s);
1323 return 0;
1324 }
1325
1326 _public_ int sd_event_add_child_pidfd(
1327 sd_event *e,
1328 sd_event_source **ret,
1329 int pidfd,
1330 int options,
1331 sd_event_child_handler_t callback,
1332 void *userdata) {
1333
1334
1335 _cleanup_(source_freep) sd_event_source *s = NULL;
1336 pid_t pid;
1337 int r;
1338
1339 assert_return(e, -EINVAL);
1340 assert_return(e = event_resolve(e), -ENOPKG);
1341 assert_return(pidfd >= 0, -EBADF);
1342 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1343 assert_return(options != 0, -EINVAL);
1344 assert_return(callback, -EINVAL);
1345 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1346 assert_return(!event_pid_changed(e), -ECHILD);
1347
1348 if (e->n_enabled_child_sources == 0) {
1349 r = signal_is_blocked(SIGCHLD);
1350 if (r < 0)
1351 return r;
1352 if (r == 0)
1353 return -EBUSY;
1354 }
1355
1356 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1357 if (r < 0)
1358 return r;
1359
1360 r = pidfd_get_pid(pidfd, &pid);
1361 if (r < 0)
1362 return r;
1363
1364 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1365 return -EBUSY;
1366
1367 s = source_new(e, !ret, SOURCE_CHILD);
1368 if (!s)
1369 return -ENOMEM;
1370
1371 s->wakeup = WAKEUP_EVENT_SOURCE;
1372 s->child.pidfd = pidfd;
1373 s->child.pid = pid;
1374 s->child.options = options;
1375 s->child.callback = callback;
1376 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1377 s->userdata = userdata;
1378 s->enabled = SD_EVENT_ONESHOT;
1379
1380 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1381 if (r < 0)
1382 return r;
1383
1384 e->n_enabled_child_sources++;
1385
1386 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1387 /* We only want to watch for WEXITED */
1388
1389 r = source_child_pidfd_register(s, s->enabled);
1390 if (r < 0) {
1391 e->n_enabled_child_sources--;
1392 return r;
1393 }
1394 } else {
1395 /* We shall wait for some other event than WEXITED */
1396
1397 r = event_make_signal_data(e, SIGCHLD, NULL);
1398 if (r < 0) {
1399 e->n_enabled_child_sources--;
1400 return r;
1401 }
1402
1403 e->need_process_child = true;
1404 }
1405
1406 if (ret)
1407 *ret = s;
1408
1409 TAKE_PTR(s);
1410 return 0;
1411 }
1412
1413 _public_ int sd_event_add_defer(
1414 sd_event *e,
1415 sd_event_source **ret,
1416 sd_event_handler_t callback,
1417 void *userdata) {
1418
1419 _cleanup_(source_freep) sd_event_source *s = NULL;
1420 int r;
1421
1422 assert_return(e, -EINVAL);
1423 assert_return(e = event_resolve(e), -ENOPKG);
1424 assert_return(callback, -EINVAL);
1425 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1426 assert_return(!event_pid_changed(e), -ECHILD);
1427
1428 s = source_new(e, !ret, SOURCE_DEFER);
1429 if (!s)
1430 return -ENOMEM;
1431
1432 s->defer.callback = callback;
1433 s->userdata = userdata;
1434 s->enabled = SD_EVENT_ONESHOT;
1435
1436 r = source_set_pending(s, true);
1437 if (r < 0)
1438 return r;
1439
1440 if (ret)
1441 *ret = s;
1442 TAKE_PTR(s);
1443
1444 return 0;
1445 }
1446
1447 _public_ int sd_event_add_post(
1448 sd_event *e,
1449 sd_event_source **ret,
1450 sd_event_handler_t callback,
1451 void *userdata) {
1452
1453 _cleanup_(source_freep) sd_event_source *s = NULL;
1454 int r;
1455
1456 assert_return(e, -EINVAL);
1457 assert_return(e = event_resolve(e), -ENOPKG);
1458 assert_return(callback, -EINVAL);
1459 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1460 assert_return(!event_pid_changed(e), -ECHILD);
1461
1462 r = set_ensure_allocated(&e->post_sources, NULL);
1463 if (r < 0)
1464 return r;
1465
1466 s = source_new(e, !ret, SOURCE_POST);
1467 if (!s)
1468 return -ENOMEM;
1469
1470 s->post.callback = callback;
1471 s->userdata = userdata;
1472 s->enabled = SD_EVENT_ON;
1473
1474 r = set_put(e->post_sources, s);
1475 if (r < 0)
1476 return r;
1477
1478 if (ret)
1479 *ret = s;
1480 TAKE_PTR(s);
1481
1482 return 0;
1483 }
1484
1485 _public_ int sd_event_add_exit(
1486 sd_event *e,
1487 sd_event_source **ret,
1488 sd_event_handler_t callback,
1489 void *userdata) {
1490
1491 _cleanup_(source_freep) sd_event_source *s = NULL;
1492 int r;
1493
1494 assert_return(e, -EINVAL);
1495 assert_return(e = event_resolve(e), -ENOPKG);
1496 assert_return(callback, -EINVAL);
1497 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1498 assert_return(!event_pid_changed(e), -ECHILD);
1499
1500 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1501 if (r < 0)
1502 return r;
1503
1504 s = source_new(e, !ret, SOURCE_EXIT);
1505 if (!s)
1506 return -ENOMEM;
1507
1508 s->exit.callback = callback;
1509 s->userdata = userdata;
1510 s->exit.prioq_index = PRIOQ_IDX_NULL;
1511 s->enabled = SD_EVENT_ONESHOT;
1512
1513 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1514 if (r < 0)
1515 return r;
1516
1517 if (ret)
1518 *ret = s;
1519 TAKE_PTR(s);
1520
1521 return 0;
1522 }
1523
1524 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1525 assert(e);
1526
1527 if (!d)
1528 return;
1529
1530 assert(hashmap_isempty(d->inodes));
1531 assert(hashmap_isempty(d->wd));
1532
1533 if (d->buffer_filled > 0)
1534 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1535
1536 hashmap_free(d->inodes);
1537 hashmap_free(d->wd);
1538
1539 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1540
1541 if (d->fd >= 0) {
1542 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1543 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1544
1545 safe_close(d->fd);
1546 }
1547 free(d);
1548 }
1549
1550 static int event_make_inotify_data(
1551 sd_event *e,
1552 int64_t priority,
1553 struct inotify_data **ret) {
1554
1555 _cleanup_close_ int fd = -1;
1556 struct inotify_data *d;
1557 struct epoll_event ev;
1558 int r;
1559
1560 assert(e);
1561
1562 d = hashmap_get(e->inotify_data, &priority);
1563 if (d) {
1564 if (ret)
1565 *ret = d;
1566 return 0;
1567 }
1568
1569 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1570 if (fd < 0)
1571 return -errno;
1572
1573 fd = fd_move_above_stdio(fd);
1574
1575 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1576 if (r < 0)
1577 return r;
1578
1579 d = new(struct inotify_data, 1);
1580 if (!d)
1581 return -ENOMEM;
1582
1583 *d = (struct inotify_data) {
1584 .wakeup = WAKEUP_INOTIFY_DATA,
1585 .fd = TAKE_FD(fd),
1586 .priority = priority,
1587 };
1588
1589 r = hashmap_put(e->inotify_data, &d->priority, d);
1590 if (r < 0) {
1591 d->fd = safe_close(d->fd);
1592 free(d);
1593 return r;
1594 }
1595
1596 ev = (struct epoll_event) {
1597 .events = EPOLLIN,
1598 .data.ptr = d,
1599 };
1600
1601 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1602 r = -errno;
1603 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1604 * remove the fd from the epoll first, which we don't want as we couldn't
1605 * add it in the first place. */
1606 event_free_inotify_data(e, d);
1607 return r;
1608 }
1609
1610 if (ret)
1611 *ret = d;
1612
1613 return 1;
1614 }
1615
1616 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1617 int r;
1618
1619 assert(x);
1620 assert(y);
1621
1622 r = CMP(x->dev, y->dev);
1623 if (r != 0)
1624 return r;
1625
1626 return CMP(x->ino, y->ino);
1627 }
1628
1629 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1630 assert(d);
1631
1632 siphash24_compress(&d->dev, sizeof(d->dev), state);
1633 siphash24_compress(&d->ino, sizeof(d->ino), state);
1634 }
1635
1636 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1637
1638 static void event_free_inode_data(
1639 sd_event *e,
1640 struct inode_data *d) {
1641
1642 assert(e);
1643
1644 if (!d)
1645 return;
1646
1647 assert(!d->event_sources);
1648
1649 if (d->fd >= 0) {
1650 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1651 safe_close(d->fd);
1652 }
1653
1654 if (d->inotify_data) {
1655
1656 if (d->wd >= 0) {
1657 if (d->inotify_data->fd >= 0) {
1658 /* So here's a problem. At the time this runs the watch descriptor might already be
1659 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1660 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1661 * likely case to happen. */
1662
1663 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1664 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1665 }
1666
1667 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1668 }
1669
1670 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1671 }
1672
1673 free(d);
1674 }
1675
1676 static void event_gc_inode_data(
1677 sd_event *e,
1678 struct inode_data *d) {
1679
1680 struct inotify_data *inotify_data;
1681
1682 assert(e);
1683
1684 if (!d)
1685 return;
1686
1687 if (d->event_sources)
1688 return;
1689
1690 inotify_data = d->inotify_data;
1691 event_free_inode_data(e, d);
1692
1693 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1694 event_free_inotify_data(e, inotify_data);
1695 }
1696
1697 static int event_make_inode_data(
1698 sd_event *e,
1699 struct inotify_data *inotify_data,
1700 dev_t dev,
1701 ino_t ino,
1702 struct inode_data **ret) {
1703
1704 struct inode_data *d, key;
1705 int r;
1706
1707 assert(e);
1708 assert(inotify_data);
1709
1710 key = (struct inode_data) {
1711 .ino = ino,
1712 .dev = dev,
1713 };
1714
1715 d = hashmap_get(inotify_data->inodes, &key);
1716 if (d) {
1717 if (ret)
1718 *ret = d;
1719
1720 return 0;
1721 }
1722
1723 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1724 if (r < 0)
1725 return r;
1726
1727 d = new(struct inode_data, 1);
1728 if (!d)
1729 return -ENOMEM;
1730
1731 *d = (struct inode_data) {
1732 .dev = dev,
1733 .ino = ino,
1734 .wd = -1,
1735 .fd = -1,
1736 .inotify_data = inotify_data,
1737 };
1738
1739 r = hashmap_put(inotify_data->inodes, d, d);
1740 if (r < 0) {
1741 free(d);
1742 return r;
1743 }
1744
1745 if (ret)
1746 *ret = d;
1747
1748 return 1;
1749 }
1750
1751 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1752 bool excl_unlink = true;
1753 uint32_t combined = 0;
1754 sd_event_source *s;
1755
1756 assert(d);
1757
1758 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1759 * the IN_EXCL_UNLINK flag is ANDed instead.
1760 *
1761 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1762 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1763 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1764 * events we don't care for client-side. */
1765
1766 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1767
1768 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1769 excl_unlink = false;
1770
1771 combined |= s->inotify.mask;
1772 }
1773
1774 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1775 }
1776
1777 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1778 uint32_t combined_mask;
1779 int wd, r;
1780
1781 assert(d);
1782 assert(d->fd >= 0);
1783
1784 combined_mask = inode_data_determine_mask(d);
1785
1786 if (d->wd >= 0 && combined_mask == d->combined_mask)
1787 return 0;
1788
1789 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1790 if (r < 0)
1791 return r;
1792
1793 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1794 if (wd < 0)
1795 return -errno;
1796
1797 if (d->wd < 0) {
1798 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1799 if (r < 0) {
1800 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1801 return r;
1802 }
1803
1804 d->wd = wd;
1805
1806 } else if (d->wd != wd) {
1807
1808 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1809 (void) inotify_rm_watch(d->fd, wd);
1810 return -EINVAL;
1811 }
1812
1813 d->combined_mask = combined_mask;
1814 return 1;
1815 }
1816
1817 _public_ int sd_event_add_inotify(
1818 sd_event *e,
1819 sd_event_source **ret,
1820 const char *path,
1821 uint32_t mask,
1822 sd_event_inotify_handler_t callback,
1823 void *userdata) {
1824
1825 struct inotify_data *inotify_data = NULL;
1826 struct inode_data *inode_data = NULL;
1827 _cleanup_close_ int fd = -1;
1828 _cleanup_(source_freep) sd_event_source *s = NULL;
1829 struct stat st;
1830 int r;
1831
1832 assert_return(e, -EINVAL);
1833 assert_return(e = event_resolve(e), -ENOPKG);
1834 assert_return(path, -EINVAL);
1835 assert_return(callback, -EINVAL);
1836 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1837 assert_return(!event_pid_changed(e), -ECHILD);
1838
1839 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1840 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1841 * the user can't use them for us. */
1842 if (mask & IN_MASK_ADD)
1843 return -EINVAL;
1844
1845 fd = open(path, O_PATH|O_CLOEXEC|
1846 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1847 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1848 if (fd < 0)
1849 return -errno;
1850
1851 if (fstat(fd, &st) < 0)
1852 return -errno;
1853
1854 s = source_new(e, !ret, SOURCE_INOTIFY);
1855 if (!s)
1856 return -ENOMEM;
1857
1858 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1859 s->inotify.mask = mask;
1860 s->inotify.callback = callback;
1861 s->userdata = userdata;
1862
1863 /* Allocate an inotify object for this priority, and an inode object within it */
1864 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1865 if (r < 0)
1866 return r;
1867
1868 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1869 if (r < 0) {
1870 event_free_inotify_data(e, inotify_data);
1871 return r;
1872 }
1873
1874 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1875 * the event source, until then, for which we need the original inode. */
1876 if (inode_data->fd < 0) {
1877 inode_data->fd = TAKE_FD(fd);
1878 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1879 }
1880
1881 /* Link our event source to the inode data object */
1882 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1883 s->inotify.inode_data = inode_data;
1884
1885 /* Actually realize the watch now */
1886 r = inode_data_realize_watch(e, inode_data);
1887 if (r < 0)
1888 return r;
1889
1890 (void) sd_event_source_set_description(s, path);
1891
1892 if (ret)
1893 *ret = s;
1894 TAKE_PTR(s);
1895
1896 return 0;
1897 }
1898
1899 static sd_event_source* event_source_free(sd_event_source *s) {
1900 if (!s)
1901 return NULL;
1902
1903 /* Here's a special hack: when we are called from a
1904 * dispatch handler we won't free the event source
1905 * immediately, but we will detach the fd from the
1906 * epoll. This way it is safe for the caller to unref
1907 * the event source and immediately close the fd, but
1908 * we still retain a valid event source object after
1909 * the callback. */
1910
1911 if (s->dispatching) {
1912 if (s->type == SOURCE_IO)
1913 source_io_unregister(s);
1914
1915 source_disconnect(s);
1916 } else
1917 source_free(s);
1918
1919 return NULL;
1920 }
1921
1922 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1923
1924 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1925 assert_return(s, -EINVAL);
1926 assert_return(!event_pid_changed(s->event), -ECHILD);
1927
1928 return free_and_strdup(&s->description, description);
1929 }
1930
1931 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1932 assert_return(s, -EINVAL);
1933 assert_return(description, -EINVAL);
1934 assert_return(!event_pid_changed(s->event), -ECHILD);
1935
1936 if (!s->description)
1937 return -ENXIO;
1938
1939 *description = s->description;
1940 return 0;
1941 }
1942
1943 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1944 assert_return(s, NULL);
1945
1946 return s->event;
1947 }
1948
1949 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1950 assert_return(s, -EINVAL);
1951 assert_return(s->type != SOURCE_EXIT, -EDOM);
1952 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1953 assert_return(!event_pid_changed(s->event), -ECHILD);
1954
1955 return s->pending;
1956 }
1957
1958 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1959 assert_return(s, -EINVAL);
1960 assert_return(s->type == SOURCE_IO, -EDOM);
1961 assert_return(!event_pid_changed(s->event), -ECHILD);
1962
1963 return s->io.fd;
1964 }
1965
1966 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1967 int r;
1968
1969 assert_return(s, -EINVAL);
1970 assert_return(fd >= 0, -EBADF);
1971 assert_return(s->type == SOURCE_IO, -EDOM);
1972 assert_return(!event_pid_changed(s->event), -ECHILD);
1973
1974 if (s->io.fd == fd)
1975 return 0;
1976
1977 if (s->enabled == SD_EVENT_OFF) {
1978 s->io.fd = fd;
1979 s->io.registered = false;
1980 } else {
1981 int saved_fd;
1982
1983 saved_fd = s->io.fd;
1984 assert(s->io.registered);
1985
1986 s->io.fd = fd;
1987 s->io.registered = false;
1988
1989 r = source_io_register(s, s->enabled, s->io.events);
1990 if (r < 0) {
1991 s->io.fd = saved_fd;
1992 s->io.registered = true;
1993 return r;
1994 }
1995
1996 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1997 }
1998
1999 return 0;
2000 }
2001
2002 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2003 assert_return(s, -EINVAL);
2004 assert_return(s->type == SOURCE_IO, -EDOM);
2005
2006 return s->io.owned;
2007 }
2008
2009 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2010 assert_return(s, -EINVAL);
2011 assert_return(s->type == SOURCE_IO, -EDOM);
2012
2013 s->io.owned = own;
2014 return 0;
2015 }
2016
2017 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2018 assert_return(s, -EINVAL);
2019 assert_return(events, -EINVAL);
2020 assert_return(s->type == SOURCE_IO, -EDOM);
2021 assert_return(!event_pid_changed(s->event), -ECHILD);
2022
2023 *events = s->io.events;
2024 return 0;
2025 }
2026
2027 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2028 int r;
2029
2030 assert_return(s, -EINVAL);
2031 assert_return(s->type == SOURCE_IO, -EDOM);
2032 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2033 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2034 assert_return(!event_pid_changed(s->event), -ECHILD);
2035
2036 /* edge-triggered updates are never skipped, so we can reset edges */
2037 if (s->io.events == events && !(events & EPOLLET))
2038 return 0;
2039
2040 r = source_set_pending(s, false);
2041 if (r < 0)
2042 return r;
2043
2044 if (s->enabled != SD_EVENT_OFF) {
2045 r = source_io_register(s, s->enabled, events);
2046 if (r < 0)
2047 return r;
2048 }
2049
2050 s->io.events = events;
2051
2052 return 0;
2053 }
2054
2055 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2056 assert_return(s, -EINVAL);
2057 assert_return(revents, -EINVAL);
2058 assert_return(s->type == SOURCE_IO, -EDOM);
2059 assert_return(s->pending, -ENODATA);
2060 assert_return(!event_pid_changed(s->event), -ECHILD);
2061
2062 *revents = s->io.revents;
2063 return 0;
2064 }
2065
2066 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2067 assert_return(s, -EINVAL);
2068 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2069 assert_return(!event_pid_changed(s->event), -ECHILD);
2070
2071 return s->signal.sig;
2072 }
2073
2074 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2075 assert_return(s, -EINVAL);
2076 assert_return(!event_pid_changed(s->event), -ECHILD);
2077
2078 *priority = s->priority;
2079 return 0;
2080 }
2081
2082 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2083 bool rm_inotify = false, rm_inode = false;
2084 struct inotify_data *new_inotify_data = NULL;
2085 struct inode_data *new_inode_data = NULL;
2086 int r;
2087
2088 assert_return(s, -EINVAL);
2089 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2090 assert_return(!event_pid_changed(s->event), -ECHILD);
2091
2092 if (s->priority == priority)
2093 return 0;
2094
2095 if (s->type == SOURCE_INOTIFY) {
2096 struct inode_data *old_inode_data;
2097
2098 assert(s->inotify.inode_data);
2099 old_inode_data = s->inotify.inode_data;
2100
2101 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2102 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2103 * events we allow priority changes only until the first following iteration. */
2104 if (old_inode_data->fd < 0)
2105 return -EOPNOTSUPP;
2106
2107 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2108 if (r < 0)
2109 return r;
2110 rm_inotify = r > 0;
2111
2112 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2113 if (r < 0)
2114 goto fail;
2115 rm_inode = r > 0;
2116
2117 if (new_inode_data->fd < 0) {
2118 /* Duplicate the fd for the new inode object if we don't have any yet */
2119 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2120 if (new_inode_data->fd < 0) {
2121 r = -errno;
2122 goto fail;
2123 }
2124
2125 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2126 }
2127
2128 /* Move the event source to the new inode data structure */
2129 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2130 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2131 s->inotify.inode_data = new_inode_data;
2132
2133 /* Now create the new watch */
2134 r = inode_data_realize_watch(s->event, new_inode_data);
2135 if (r < 0) {
2136 /* Move it back */
2137 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2138 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2139 s->inotify.inode_data = old_inode_data;
2140 goto fail;
2141 }
2142
2143 s->priority = priority;
2144
2145 event_gc_inode_data(s->event, old_inode_data);
2146
2147 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2148 struct signal_data *old, *d;
2149
2150 /* Move us from the signalfd belonging to the old
2151 * priority to the signalfd of the new priority */
2152
2153 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2154
2155 s->priority = priority;
2156
2157 r = event_make_signal_data(s->event, s->signal.sig, &d);
2158 if (r < 0) {
2159 s->priority = old->priority;
2160 return r;
2161 }
2162
2163 event_unmask_signal_data(s->event, old, s->signal.sig);
2164 } else
2165 s->priority = priority;
2166
2167 if (s->pending)
2168 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2169
2170 if (s->prepare)
2171 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2172
2173 if (s->type == SOURCE_EXIT)
2174 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2175
2176 return 0;
2177
2178 fail:
2179 if (rm_inode)
2180 event_free_inode_data(s->event, new_inode_data);
2181
2182 if (rm_inotify)
2183 event_free_inotify_data(s->event, new_inotify_data);
2184
2185 return r;
2186 }
2187
2188 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2189 assert_return(s, -EINVAL);
2190 assert_return(!event_pid_changed(s->event), -ECHILD);
2191
2192 if (m)
2193 *m = s->enabled;
2194 return s->enabled != SD_EVENT_OFF;
2195 }
2196
2197 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2198 int r;
2199
2200 assert_return(s, -EINVAL);
2201 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2202 assert_return(!event_pid_changed(s->event), -ECHILD);
2203
2204 /* If we are dead anyway, we are fine with turning off
2205 * sources, but everything else needs to fail. */
2206 if (s->event->state == SD_EVENT_FINISHED)
2207 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2208
2209 if (s->enabled == m)
2210 return 0;
2211
2212 if (m == SD_EVENT_OFF) {
2213
2214 /* Unset the pending flag when this event source is disabled */
2215 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2216 r = source_set_pending(s, false);
2217 if (r < 0)
2218 return r;
2219 }
2220
2221 switch (s->type) {
2222
2223 case SOURCE_IO:
2224 source_io_unregister(s);
2225 s->enabled = m;
2226 break;
2227
2228 case SOURCE_TIME_REALTIME:
2229 case SOURCE_TIME_BOOTTIME:
2230 case SOURCE_TIME_MONOTONIC:
2231 case SOURCE_TIME_REALTIME_ALARM:
2232 case SOURCE_TIME_BOOTTIME_ALARM: {
2233 struct clock_data *d;
2234
2235 s->enabled = m;
2236 d = event_get_clock_data(s->event, s->type);
2237 assert(d);
2238
2239 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2240 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2241 d->needs_rearm = true;
2242 break;
2243 }
2244
2245 case SOURCE_SIGNAL:
2246 s->enabled = m;
2247
2248 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2249 break;
2250
2251 case SOURCE_CHILD:
2252 s->enabled = m;
2253
2254 assert(s->event->n_enabled_child_sources > 0);
2255 s->event->n_enabled_child_sources--;
2256
2257 if (EVENT_SOURCE_WATCH_PIDFD(s))
2258 source_child_pidfd_unregister(s);
2259 else
2260 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2261
2262 break;
2263
2264 case SOURCE_EXIT:
2265 s->enabled = m;
2266 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2267 break;
2268
2269 case SOURCE_DEFER:
2270 case SOURCE_POST:
2271 case SOURCE_INOTIFY:
2272 s->enabled = m;
2273 break;
2274
2275 default:
2276 assert_not_reached("Wut? I shouldn't exist.");
2277 }
2278
2279 } else {
2280
2281 /* Unset the pending flag when this event source is enabled */
2282 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2283 r = source_set_pending(s, false);
2284 if (r < 0)
2285 return r;
2286 }
2287
2288 switch (s->type) {
2289
2290 case SOURCE_IO:
2291 r = source_io_register(s, m, s->io.events);
2292 if (r < 0)
2293 return r;
2294
2295 s->enabled = m;
2296 break;
2297
2298 case SOURCE_TIME_REALTIME:
2299 case SOURCE_TIME_BOOTTIME:
2300 case SOURCE_TIME_MONOTONIC:
2301 case SOURCE_TIME_REALTIME_ALARM:
2302 case SOURCE_TIME_BOOTTIME_ALARM: {
2303 struct clock_data *d;
2304
2305 s->enabled = m;
2306 d = event_get_clock_data(s->event, s->type);
2307 assert(d);
2308
2309 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2310 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2311 d->needs_rearm = true;
2312 break;
2313 }
2314
2315 case SOURCE_SIGNAL:
2316
2317 s->enabled = m;
2318
2319 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2320 if (r < 0) {
2321 s->enabled = SD_EVENT_OFF;
2322 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2323 return r;
2324 }
2325
2326 break;
2327
2328 case SOURCE_CHILD:
2329
2330 if (s->enabled == SD_EVENT_OFF)
2331 s->event->n_enabled_child_sources++;
2332
2333 s->enabled = m;
2334
2335 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2336 /* yes, we have pidfd */
2337
2338 r = source_child_pidfd_register(s, s->enabled);
2339 if (r < 0) {
2340 s->enabled = SD_EVENT_OFF;
2341 s->event->n_enabled_child_sources--;
2342 return r;
2343 }
2344 } else {
2345 /* no pidfd, or something other to watch for than WEXITED */
2346
2347 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2348 if (r < 0) {
2349 s->enabled = SD_EVENT_OFF;
2350 s->event->n_enabled_child_sources--;
2351 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2352 return r;
2353 }
2354 }
2355
2356 break;
2357
2358 case SOURCE_EXIT:
2359 s->enabled = m;
2360 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2361 break;
2362
2363 case SOURCE_DEFER:
2364 case SOURCE_POST:
2365 case SOURCE_INOTIFY:
2366 s->enabled = m;
2367 break;
2368
2369 default:
2370 assert_not_reached("Wut? I shouldn't exist.");
2371 }
2372 }
2373
2374 if (s->pending)
2375 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2376
2377 if (s->prepare)
2378 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2379
2380 return 0;
2381 }
2382
2383 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2384 assert_return(s, -EINVAL);
2385 assert_return(usec, -EINVAL);
2386 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2387 assert_return(!event_pid_changed(s->event), -ECHILD);
2388
2389 *usec = s->time.next;
2390 return 0;
2391 }
2392
2393 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2394 struct clock_data *d;
2395 int r;
2396
2397 assert_return(s, -EINVAL);
2398 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2399 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2400 assert_return(!event_pid_changed(s->event), -ECHILD);
2401
2402 r = source_set_pending(s, false);
2403 if (r < 0)
2404 return r;
2405
2406 s->time.next = usec;
2407
2408 d = event_get_clock_data(s->event, s->type);
2409 assert(d);
2410
2411 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2412 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2413 d->needs_rearm = true;
2414
2415 return 0;
2416 }
2417
2418 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2419 assert_return(s, -EINVAL);
2420 assert_return(usec, -EINVAL);
2421 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2422 assert_return(!event_pid_changed(s->event), -ECHILD);
2423
2424 *usec = s->time.accuracy;
2425 return 0;
2426 }
2427
2428 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2429 struct clock_data *d;
2430 int r;
2431
2432 assert_return(s, -EINVAL);
2433 assert_return(usec != (uint64_t) -1, -EINVAL);
2434 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2435 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2436 assert_return(!event_pid_changed(s->event), -ECHILD);
2437
2438 r = source_set_pending(s, false);
2439 if (r < 0)
2440 return r;
2441
2442 if (usec == 0)
2443 usec = DEFAULT_ACCURACY_USEC;
2444
2445 s->time.accuracy = usec;
2446
2447 d = event_get_clock_data(s->event, s->type);
2448 assert(d);
2449
2450 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2451 d->needs_rearm = true;
2452
2453 return 0;
2454 }
2455
2456 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2457 assert_return(s, -EINVAL);
2458 assert_return(clock, -EINVAL);
2459 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2460 assert_return(!event_pid_changed(s->event), -ECHILD);
2461
2462 *clock = event_source_type_to_clock(s->type);
2463 return 0;
2464 }
2465
2466 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2467 assert_return(s, -EINVAL);
2468 assert_return(pid, -EINVAL);
2469 assert_return(s->type == SOURCE_CHILD, -EDOM);
2470 assert_return(!event_pid_changed(s->event), -ECHILD);
2471
2472 *pid = s->child.pid;
2473 return 0;
2474 }
2475
2476 _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2477 assert_return(s, -EINVAL);
2478 assert_return(s->type == SOURCE_CHILD, -EDOM);
2479 assert_return(!event_pid_changed(s->event), -ECHILD);
2480
2481 if (s->child.pidfd < 0)
2482 return -EOPNOTSUPP;
2483
2484 return s->child.pidfd;
2485 }
2486
2487 _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2488 assert_return(s, -EINVAL);
2489 assert_return(s->type == SOURCE_CHILD, -EDOM);
2490 assert_return(!event_pid_changed(s->event), -ECHILD);
2491 assert_return(SIGNAL_VALID(sig), -EINVAL);
2492
2493 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2494 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2495 * available. */
2496 if (s->child.exited)
2497 return -ESRCH;
2498
2499 if (s->child.pidfd >= 0) {
2500 siginfo_t copy;
2501
2502 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2503 * structure here */
2504 if (si)
2505 copy = *si;
2506
2507 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2508 /* Let's propagate the error only if the system call is not implemented or prohibited */
2509 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2510 return -errno;
2511 } else
2512 return 0;
2513 }
2514
2515 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2516 * this here. */
2517 if (flags != 0)
2518 return -EOPNOTSUPP;
2519
2520 if (si) {
2521 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2522 siginfo_t copy = *si;
2523
2524 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2525 return -errno;
2526 } else if (kill(s->child.pid, sig) < 0)
2527 return -errno;
2528
2529 return 0;
2530 }
2531
2532 _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2533 assert_return(s, -EINVAL);
2534 assert_return(s->type == SOURCE_CHILD, -EDOM);
2535
2536 if (s->child.pidfd < 0)
2537 return -EOPNOTSUPP;
2538
2539 return s->child.pidfd_owned;
2540 }
2541
2542 _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2543 assert_return(s, -EINVAL);
2544 assert_return(s->type == SOURCE_CHILD, -EDOM);
2545
2546 if (s->child.pidfd < 0)
2547 return -EOPNOTSUPP;
2548
2549 s->child.pidfd_owned = own;
2550 return 0;
2551 }
2552
2553 _public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2554 assert_return(s, -EINVAL);
2555 assert_return(s->type == SOURCE_CHILD, -EDOM);
2556
2557 return s->child.process_owned;
2558 }
2559
2560 _public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2561 assert_return(s, -EINVAL);
2562 assert_return(s->type == SOURCE_CHILD, -EDOM);
2563
2564 s->child.process_owned = own;
2565 return 0;
2566 }
2567
2568 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2569 assert_return(s, -EINVAL);
2570 assert_return(mask, -EINVAL);
2571 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2572 assert_return(!event_pid_changed(s->event), -ECHILD);
2573
2574 *mask = s->inotify.mask;
2575 return 0;
2576 }
2577
2578 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2579 int r;
2580
2581 assert_return(s, -EINVAL);
2582 assert_return(s->type != SOURCE_EXIT, -EDOM);
2583 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2584 assert_return(!event_pid_changed(s->event), -ECHILD);
2585
2586 if (s->prepare == callback)
2587 return 0;
2588
2589 if (callback && s->prepare) {
2590 s->prepare = callback;
2591 return 0;
2592 }
2593
2594 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2595 if (r < 0)
2596 return r;
2597
2598 s->prepare = callback;
2599
2600 if (callback) {
2601 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2602 if (r < 0)
2603 return r;
2604 } else
2605 prioq_remove(s->event->prepare, s, &s->prepare_index);
2606
2607 return 0;
2608 }
2609
2610 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2611 assert_return(s, NULL);
2612
2613 return s->userdata;
2614 }
2615
2616 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2617 void *ret;
2618
2619 assert_return(s, NULL);
2620
2621 ret = s->userdata;
2622 s->userdata = userdata;
2623
2624 return ret;
2625 }
2626
2627 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2628 usec_t c;
2629 assert(e);
2630 assert(a <= b);
2631
2632 if (a <= 0)
2633 return 0;
2634 if (a >= USEC_INFINITY)
2635 return USEC_INFINITY;
2636
2637 if (b <= a + 1)
2638 return a;
2639
2640 initialize_perturb(e);
2641
2642 /*
2643 Find a good time to wake up again between times a and b. We
2644 have two goals here:
2645
2646 a) We want to wake up as seldom as possible, hence prefer
2647 later times over earlier times.
2648
2649 b) But if we have to wake up, then let's make sure to
2650 dispatch as much as possible on the entire system.
2651
2652 We implement this by waking up everywhere at the same time
2653 within any given minute if we can, synchronised via the
2654 perturbation value determined from the boot ID. If we can't,
2655 then we try to find the same spot in every 10s, then 1s and
2656 then 250ms step. Otherwise, we pick the last possible time
2657 to wake up.
2658 */
2659
2660 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2661 if (c >= b) {
2662 if (_unlikely_(c < USEC_PER_MINUTE))
2663 return b;
2664
2665 c -= USEC_PER_MINUTE;
2666 }
2667
2668 if (c >= a)
2669 return c;
2670
2671 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2672 if (c >= b) {
2673 if (_unlikely_(c < USEC_PER_SEC*10))
2674 return b;
2675
2676 c -= USEC_PER_SEC*10;
2677 }
2678
2679 if (c >= a)
2680 return c;
2681
2682 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2683 if (c >= b) {
2684 if (_unlikely_(c < USEC_PER_SEC))
2685 return b;
2686
2687 c -= USEC_PER_SEC;
2688 }
2689
2690 if (c >= a)
2691 return c;
2692
2693 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2694 if (c >= b) {
2695 if (_unlikely_(c < USEC_PER_MSEC*250))
2696 return b;
2697
2698 c -= USEC_PER_MSEC*250;
2699 }
2700
2701 if (c >= a)
2702 return c;
2703
2704 return b;
2705 }
2706
2707 static int event_arm_timer(
2708 sd_event *e,
2709 struct clock_data *d) {
2710
2711 struct itimerspec its = {};
2712 sd_event_source *a, *b;
2713 usec_t t;
2714 int r;
2715
2716 assert(e);
2717 assert(d);
2718
2719 if (!d->needs_rearm)
2720 return 0;
2721 else
2722 d->needs_rearm = false;
2723
2724 a = prioq_peek(d->earliest);
2725 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2726
2727 if (d->fd < 0)
2728 return 0;
2729
2730 if (d->next == USEC_INFINITY)
2731 return 0;
2732
2733 /* disarm */
2734 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2735 if (r < 0)
2736 return r;
2737
2738 d->next = USEC_INFINITY;
2739 return 0;
2740 }
2741
2742 b = prioq_peek(d->latest);
2743 assert_se(b && b->enabled != SD_EVENT_OFF);
2744
2745 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2746 if (d->next == t)
2747 return 0;
2748
2749 assert_se(d->fd >= 0);
2750
2751 if (t == 0) {
2752 /* We don' want to disarm here, just mean some time looooong ago. */
2753 its.it_value.tv_sec = 0;
2754 its.it_value.tv_nsec = 1;
2755 } else
2756 timespec_store(&its.it_value, t);
2757
2758 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2759 if (r < 0)
2760 return -errno;
2761
2762 d->next = t;
2763 return 0;
2764 }
2765
2766 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2767 assert(e);
2768 assert(s);
2769 assert(s->type == SOURCE_IO);
2770
2771 /* If the event source was already pending, we just OR in the
2772 * new revents, otherwise we reset the value. The ORing is
2773 * necessary to handle EPOLLONESHOT events properly where
2774 * readability might happen independently of writability, and
2775 * we need to keep track of both */
2776
2777 if (s->pending)
2778 s->io.revents |= revents;
2779 else
2780 s->io.revents = revents;
2781
2782 return source_set_pending(s, true);
2783 }
2784
2785 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2786 uint64_t x;
2787 ssize_t ss;
2788
2789 assert(e);
2790 assert(fd >= 0);
2791
2792 assert_return(events == EPOLLIN, -EIO);
2793
2794 ss = read(fd, &x, sizeof(x));
2795 if (ss < 0) {
2796 if (IN_SET(errno, EAGAIN, EINTR))
2797 return 0;
2798
2799 return -errno;
2800 }
2801
2802 if (_unlikely_(ss != sizeof(x)))
2803 return -EIO;
2804
2805 if (next)
2806 *next = USEC_INFINITY;
2807
2808 return 0;
2809 }
2810
2811 static int process_timer(
2812 sd_event *e,
2813 usec_t n,
2814 struct clock_data *d) {
2815
2816 sd_event_source *s;
2817 int r;
2818
2819 assert(e);
2820 assert(d);
2821
2822 for (;;) {
2823 s = prioq_peek(d->earliest);
2824 if (!s ||
2825 s->time.next > n ||
2826 s->enabled == SD_EVENT_OFF ||
2827 s->pending)
2828 break;
2829
2830 r = source_set_pending(s, true);
2831 if (r < 0)
2832 return r;
2833
2834 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2835 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2836 d->needs_rearm = true;
2837 }
2838
2839 return 0;
2840 }
2841
2842 static int process_child(sd_event *e) {
2843 sd_event_source *s;
2844 Iterator i;
2845 int r;
2846
2847 assert(e);
2848
2849 e->need_process_child = false;
2850
2851 /*
2852 So, this is ugly. We iteratively invoke waitid() with P_PID
2853 + WNOHANG for each PID we wait for, instead of using
2854 P_ALL. This is because we only want to get child
2855 information of very specific child processes, and not all
2856 of them. We might not have processed the SIGCHLD even of a
2857 previous invocation and we don't want to maintain a
2858 unbounded *per-child* event queue, hence we really don't
2859 want anything flushed out of the kernel's queue that we
2860 don't care about. Since this is O(n) this means that if you
2861 have a lot of processes you probably want to handle SIGCHLD
2862 yourself.
2863
2864 We do not reap the children here (by using WNOWAIT), this
2865 is only done after the event source is dispatched so that
2866 the callback still sees the process as a zombie.
2867 */
2868
2869 HASHMAP_FOREACH(s, e->child_sources, i) {
2870 assert(s->type == SOURCE_CHILD);
2871
2872 if (s->pending)
2873 continue;
2874
2875 if (s->enabled == SD_EVENT_OFF)
2876 continue;
2877
2878 if (s->child.exited)
2879 continue;
2880
2881 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2882 continue;
2883
2884 zero(s->child.siginfo);
2885 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2886 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2887 if (r < 0)
2888 return -errno;
2889
2890 if (s->child.siginfo.si_pid != 0) {
2891 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2892
2893 if (zombie)
2894 s->child.exited = true;
2895
2896 if (!zombie && (s->child.options & WEXITED)) {
2897 /* If the child isn't dead then let's
2898 * immediately remove the state change
2899 * from the queue, since there's no
2900 * benefit in leaving it queued */
2901
2902 assert(s->child.options & (WSTOPPED|WCONTINUED));
2903 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2904 }
2905
2906 r = source_set_pending(s, true);
2907 if (r < 0)
2908 return r;
2909 }
2910 }
2911
2912 return 0;
2913 }
2914
2915 static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2916 assert(e);
2917 assert(s);
2918 assert(s->type == SOURCE_CHILD);
2919
2920 if (s->pending)
2921 return 0;
2922
2923 if (s->enabled == SD_EVENT_OFF)
2924 return 0;
2925
2926 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2927 return 0;
2928
2929 zero(s->child.siginfo);
2930 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2931 return -errno;
2932
2933 if (s->child.siginfo.si_pid == 0)
2934 return 0;
2935
2936 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2937 s->child.exited = true;
2938
2939 return source_set_pending(s, true);
2940 }
2941
2942 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2943 bool read_one = false;
2944 int r;
2945
2946 assert(e);
2947 assert(d);
2948 assert_return(events == EPOLLIN, -EIO);
2949
2950 /* If there's a signal queued on this priority and SIGCHLD is
2951 on this priority too, then make sure to recheck the
2952 children we watch. This is because we only ever dequeue
2953 the first signal per priority, and if we dequeue one, and
2954 SIGCHLD might be enqueued later we wouldn't know, but we
2955 might have higher priority children we care about hence we
2956 need to check that explicitly. */
2957
2958 if (sigismember(&d->sigset, SIGCHLD))
2959 e->need_process_child = true;
2960
2961 /* If there's already an event source pending for this
2962 * priority we don't read another */
2963 if (d->current)
2964 return 0;
2965
2966 for (;;) {
2967 struct signalfd_siginfo si;
2968 ssize_t n;
2969 sd_event_source *s = NULL;
2970
2971 n = read(d->fd, &si, sizeof(si));
2972 if (n < 0) {
2973 if (IN_SET(errno, EAGAIN, EINTR))
2974 return read_one;
2975
2976 return -errno;
2977 }
2978
2979 if (_unlikely_(n != sizeof(si)))
2980 return -EIO;
2981
2982 assert(SIGNAL_VALID(si.ssi_signo));
2983
2984 read_one = true;
2985
2986 if (e->signal_sources)
2987 s = e->signal_sources[si.ssi_signo];
2988 if (!s)
2989 continue;
2990 if (s->pending)
2991 continue;
2992
2993 s->signal.siginfo = si;
2994 d->current = s;
2995
2996 r = source_set_pending(s, true);
2997 if (r < 0)
2998 return r;
2999
3000 return 1;
3001 }
3002 }
3003
3004 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3005 ssize_t n;
3006
3007 assert(e);
3008 assert(d);
3009
3010 assert_return(revents == EPOLLIN, -EIO);
3011
3012 /* If there's already an event source pending for this priority, don't read another */
3013 if (d->n_pending > 0)
3014 return 0;
3015
3016 /* Is the read buffer non-empty? If so, let's not read more */
3017 if (d->buffer_filled > 0)
3018 return 0;
3019
3020 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3021 if (n < 0) {
3022 if (IN_SET(errno, EAGAIN, EINTR))
3023 return 0;
3024
3025 return -errno;
3026 }
3027
3028 assert(n > 0);
3029 d->buffer_filled = (size_t) n;
3030 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3031
3032 return 1;
3033 }
3034
3035 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3036 assert(e);
3037 assert(d);
3038 assert(sz <= d->buffer_filled);
3039
3040 if (sz == 0)
3041 return;
3042
3043 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3044 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3045 d->buffer_filled -= sz;
3046
3047 if (d->buffer_filled == 0)
3048 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3049 }
3050
3051 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3052 int r;
3053
3054 assert(e);
3055 assert(d);
3056
3057 /* If there's already an event source pending for this priority, don't read another */
3058 if (d->n_pending > 0)
3059 return 0;
3060
3061 while (d->buffer_filled > 0) {
3062 size_t sz;
3063
3064 /* Let's validate that the event structures are complete */
3065 if (d->buffer_filled < offsetof(struct inotify_event, name))
3066 return -EIO;
3067
3068 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3069 if (d->buffer_filled < sz)
3070 return -EIO;
3071
3072 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3073 struct inode_data *inode_data;
3074 Iterator i;
3075
3076 /* The queue overran, let's pass this event to all event sources connected to this inotify
3077 * object */
3078
3079 HASHMAP_FOREACH(inode_data, d->inodes, i) {
3080 sd_event_source *s;
3081
3082 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3083
3084 if (s->enabled == SD_EVENT_OFF)
3085 continue;
3086
3087 r = source_set_pending(s, true);
3088 if (r < 0)
3089 return r;
3090 }
3091 }
3092 } else {
3093 struct inode_data *inode_data;
3094 sd_event_source *s;
3095
3096 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3097 * our watch descriptor table. */
3098 if (d->buffer.ev.mask & IN_IGNORED) {
3099
3100 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3101 if (!inode_data) {
3102 event_inotify_data_drop(e, d, sz);
3103 continue;
3104 }
3105
3106 /* The watch descriptor was removed by the kernel, let's drop it here too */
3107 inode_data->wd = -1;
3108 } else {
3109 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3110 if (!inode_data) {
3111 event_inotify_data_drop(e, d, sz);
3112 continue;
3113 }
3114 }
3115
3116 /* Trigger all event sources that are interested in these events. Also trigger all event
3117 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3118 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3119
3120 if (s->enabled == SD_EVENT_OFF)
3121 continue;
3122
3123 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3124 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3125 continue;
3126
3127 r = source_set_pending(s, true);
3128 if (r < 0)
3129 return r;
3130 }
3131 }
3132
3133 /* Something pending now? If so, let's finish, otherwise let's read more. */
3134 if (d->n_pending > 0)
3135 return 1;
3136 }
3137
3138 return 0;
3139 }
3140
3141 static int process_inotify(sd_event *e) {
3142 struct inotify_data *d;
3143 int r, done = 0;
3144
3145 assert(e);
3146
3147 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3148 r = event_inotify_data_process(e, d);
3149 if (r < 0)
3150 return r;
3151 if (r > 0)
3152 done ++;
3153 }
3154
3155 return done;
3156 }
3157
3158 static int source_dispatch(sd_event_source *s) {
3159 EventSourceType saved_type;
3160 int r = 0;
3161
3162 assert(s);
3163 assert(s->pending || s->type == SOURCE_EXIT);
3164
3165 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3166 * the event. */
3167 saved_type = s->type;
3168
3169 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3170 r = source_set_pending(s, false);
3171 if (r < 0)
3172 return r;
3173 }
3174
3175 if (s->type != SOURCE_POST) {
3176 sd_event_source *z;
3177 Iterator i;
3178
3179 /* If we execute a non-post source, let's mark all
3180 * post sources as pending */
3181
3182 SET_FOREACH(z, s->event->post_sources, i) {
3183 if (z->enabled == SD_EVENT_OFF)
3184 continue;
3185
3186 r = source_set_pending(z, true);
3187 if (r < 0)
3188 return r;
3189 }
3190 }
3191
3192 if (s->enabled == SD_EVENT_ONESHOT) {
3193 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3194 if (r < 0)
3195 return r;
3196 }
3197
3198 s->dispatching = true;
3199
3200 switch (s->type) {
3201
3202 case SOURCE_IO:
3203 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3204 break;
3205
3206 case SOURCE_TIME_REALTIME:
3207 case SOURCE_TIME_BOOTTIME:
3208 case SOURCE_TIME_MONOTONIC:
3209 case SOURCE_TIME_REALTIME_ALARM:
3210 case SOURCE_TIME_BOOTTIME_ALARM:
3211 r = s->time.callback(s, s->time.next, s->userdata);
3212 break;
3213
3214 case SOURCE_SIGNAL:
3215 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3216 break;
3217
3218 case SOURCE_CHILD: {
3219 bool zombie;
3220
3221 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3222
3223 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3224
3225 /* Now, reap the PID for good. */
3226 if (zombie) {
3227 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3228 s->child.waited = true;
3229 }
3230
3231 break;
3232 }
3233
3234 case SOURCE_DEFER:
3235 r = s->defer.callback(s, s->userdata);
3236 break;
3237
3238 case SOURCE_POST:
3239 r = s->post.callback(s, s->userdata);
3240 break;
3241
3242 case SOURCE_EXIT:
3243 r = s->exit.callback(s, s->userdata);
3244 break;
3245
3246 case SOURCE_INOTIFY: {
3247 struct sd_event *e = s->event;
3248 struct inotify_data *d;
3249 size_t sz;
3250
3251 assert(s->inotify.inode_data);
3252 assert_se(d = s->inotify.inode_data->inotify_data);
3253
3254 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3255 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3256 assert(d->buffer_filled >= sz);
3257
3258 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3259
3260 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3261 * buffer. */
3262 if (d->n_pending == 0)
3263 event_inotify_data_drop(e, d, sz);
3264
3265 break;
3266 }
3267
3268 case SOURCE_WATCHDOG:
3269 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3270 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3271 assert_not_reached("Wut? I shouldn't exist.");
3272 }
3273
3274 s->dispatching = false;
3275
3276 if (r < 0)
3277 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3278 strna(s->description), event_source_type_to_string(saved_type));
3279
3280 if (s->n_ref == 0)
3281 source_free(s);
3282 else if (r < 0)
3283 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3284
3285 return 1;
3286 }
3287
3288 static int event_prepare(sd_event *e) {
3289 int r;
3290
3291 assert(e);
3292
3293 for (;;) {
3294 sd_event_source *s;
3295
3296 s = prioq_peek(e->prepare);
3297 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3298 break;
3299
3300 s->prepare_iteration = e->iteration;
3301 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3302 if (r < 0)
3303 return r;
3304
3305 assert(s->prepare);
3306
3307 s->dispatching = true;
3308 r = s->prepare(s, s->userdata);
3309 s->dispatching = false;
3310
3311 if (r < 0)
3312 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3313 strna(s->description), event_source_type_to_string(s->type));
3314
3315 if (s->n_ref == 0)
3316 source_free(s);
3317 else if (r < 0)
3318 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3319 }
3320
3321 return 0;
3322 }
3323
3324 static int dispatch_exit(sd_event *e) {
3325 sd_event_source *p;
3326 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3327 int r;
3328
3329 assert(e);
3330
3331 p = prioq_peek(e->exit);
3332 if (!p || p->enabled == SD_EVENT_OFF) {
3333 e->state = SD_EVENT_FINISHED;
3334 return 0;
3335 }
3336
3337 ref = sd_event_ref(e);
3338 e->iteration++;
3339 e->state = SD_EVENT_EXITING;
3340 r = source_dispatch(p);
3341 e->state = SD_EVENT_INITIAL;
3342 return r;
3343 }
3344
3345 static sd_event_source* event_next_pending(sd_event *e) {
3346 sd_event_source *p;
3347
3348 assert(e);
3349
3350 p = prioq_peek(e->pending);
3351 if (!p)
3352 return NULL;
3353
3354 if (p->enabled == SD_EVENT_OFF)
3355 return NULL;
3356
3357 return p;
3358 }
3359
3360 static int arm_watchdog(sd_event *e) {
3361 struct itimerspec its = {};
3362 usec_t t;
3363 int r;
3364
3365 assert(e);
3366 assert(e->watchdog_fd >= 0);
3367
3368 t = sleep_between(e,
3369 e->watchdog_last + (e->watchdog_period / 2),
3370 e->watchdog_last + (e->watchdog_period * 3 / 4));
3371
3372 timespec_store(&its.it_value, t);
3373
3374 /* Make sure we never set the watchdog to 0, which tells the
3375 * kernel to disable it. */
3376 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3377 its.it_value.tv_nsec = 1;
3378
3379 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3380 if (r < 0)
3381 return -errno;
3382
3383 return 0;
3384 }
3385
3386 static int process_watchdog(sd_event *e) {
3387 assert(e);
3388
3389 if (!e->watchdog)
3390 return 0;
3391
3392 /* Don't notify watchdog too often */
3393 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3394 return 0;
3395
3396 sd_notify(false, "WATCHDOG=1");
3397 e->watchdog_last = e->timestamp.monotonic;
3398
3399 return arm_watchdog(e);
3400 }
3401
3402 static void event_close_inode_data_fds(sd_event *e) {
3403 struct inode_data *d;
3404
3405 assert(e);
3406
3407 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3408 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3409 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3410 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3411 * compromise. */
3412
3413 while ((d = e->inode_data_to_close)) {
3414 assert(d->fd >= 0);
3415 d->fd = safe_close(d->fd);
3416
3417 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3418 }
3419 }
3420
3421 _public_ int sd_event_prepare(sd_event *e) {
3422 int r;
3423
3424 assert_return(e, -EINVAL);
3425 assert_return(e = event_resolve(e), -ENOPKG);
3426 assert_return(!event_pid_changed(e), -ECHILD);
3427 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3428 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3429
3430 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3431 * this check here once, since gettid() is typically not cached, and thus want to minimize
3432 * syscalls */
3433 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3434
3435 if (e->exit_requested)
3436 goto pending;
3437
3438 e->iteration++;
3439
3440 e->state = SD_EVENT_PREPARING;
3441 r = event_prepare(e);
3442 e->state = SD_EVENT_INITIAL;
3443 if (r < 0)
3444 return r;
3445
3446 r = event_arm_timer(e, &e->realtime);
3447 if (r < 0)
3448 return r;
3449
3450 r = event_arm_timer(e, &e->boottime);
3451 if (r < 0)
3452 return r;
3453
3454 r = event_arm_timer(e, &e->monotonic);
3455 if (r < 0)
3456 return r;
3457
3458 r = event_arm_timer(e, &e->realtime_alarm);
3459 if (r < 0)
3460 return r;
3461
3462 r = event_arm_timer(e, &e->boottime_alarm);
3463 if (r < 0)
3464 return r;
3465
3466 event_close_inode_data_fds(e);
3467
3468 if (event_next_pending(e) || e->need_process_child)
3469 goto pending;
3470
3471 e->state = SD_EVENT_ARMED;
3472
3473 return 0;
3474
3475 pending:
3476 e->state = SD_EVENT_ARMED;
3477 r = sd_event_wait(e, 0);
3478 if (r == 0)
3479 e->state = SD_EVENT_ARMED;
3480
3481 return r;
3482 }
3483
3484 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3485 size_t event_queue_max;
3486 int r, m, i;
3487
3488 assert_return(e, -EINVAL);
3489 assert_return(e = event_resolve(e), -ENOPKG);
3490 assert_return(!event_pid_changed(e), -ECHILD);
3491 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3492 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3493
3494 if (e->exit_requested) {
3495 e->state = SD_EVENT_PENDING;
3496 return 1;
3497 }
3498
3499 event_queue_max = MAX(e->n_sources, 1u);
3500 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3501 return -ENOMEM;
3502
3503 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3504 if (e->inotify_data_buffered)
3505 timeout = 0;
3506
3507 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
3508 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3509 if (m < 0) {
3510 if (errno == EINTR) {
3511 e->state = SD_EVENT_PENDING;
3512 return 1;
3513 }
3514
3515 r = -errno;
3516 goto finish;
3517 }
3518
3519 triple_timestamp_get(&e->timestamp);
3520
3521 for (i = 0; i < m; i++) {
3522
3523 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3524 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
3525 else {
3526 WakeupType *t = e->event_queue[i].data.ptr;
3527
3528 switch (*t) {
3529
3530 case WAKEUP_EVENT_SOURCE: {
3531 sd_event_source *s = e->event_queue[i].data.ptr;
3532
3533 assert(s);
3534
3535 switch (s->type) {
3536
3537 case SOURCE_IO:
3538 r = process_io(e, s, e->event_queue[i].events);
3539 break;
3540
3541 case SOURCE_CHILD:
3542 r = process_pidfd(e, s, e->event_queue[i].events);
3543 break;
3544
3545 default:
3546 assert_not_reached("Unexpected event source type");
3547 }
3548
3549 break;
3550 }
3551
3552 case WAKEUP_CLOCK_DATA: {
3553 struct clock_data *d = e->event_queue[i].data.ptr;
3554
3555 assert(d);
3556
3557 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
3558 break;
3559 }
3560
3561 case WAKEUP_SIGNAL_DATA:
3562 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3563 break;
3564
3565 case WAKEUP_INOTIFY_DATA:
3566 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3567 break;
3568
3569 default:
3570 assert_not_reached("Invalid wake-up pointer");
3571 }
3572 }
3573 if (r < 0)
3574 goto finish;
3575 }
3576
3577 r = process_watchdog(e);
3578 if (r < 0)
3579 goto finish;
3580
3581 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3582 if (r < 0)
3583 goto finish;
3584
3585 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3586 if (r < 0)
3587 goto finish;
3588
3589 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3590 if (r < 0)
3591 goto finish;
3592
3593 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3594 if (r < 0)
3595 goto finish;
3596
3597 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3598 if (r < 0)
3599 goto finish;
3600
3601 if (e->need_process_child) {
3602 r = process_child(e);
3603 if (r < 0)
3604 goto finish;
3605 }
3606
3607 r = process_inotify(e);
3608 if (r < 0)
3609 goto finish;
3610
3611 if (event_next_pending(e)) {
3612 e->state = SD_EVENT_PENDING;
3613
3614 return 1;
3615 }
3616
3617 r = 0;
3618
3619 finish:
3620 e->state = SD_EVENT_INITIAL;
3621
3622 return r;
3623 }
3624
3625 _public_ int sd_event_dispatch(sd_event *e) {
3626 sd_event_source *p;
3627 int r;
3628
3629 assert_return(e, -EINVAL);
3630 assert_return(e = event_resolve(e), -ENOPKG);
3631 assert_return(!event_pid_changed(e), -ECHILD);
3632 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3633 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3634
3635 if (e->exit_requested)
3636 return dispatch_exit(e);
3637
3638 p = event_next_pending(e);
3639 if (p) {
3640 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3641
3642 ref = sd_event_ref(e);
3643 e->state = SD_EVENT_RUNNING;
3644 r = source_dispatch(p);
3645 e->state = SD_EVENT_INITIAL;
3646 return r;
3647 }
3648
3649 e->state = SD_EVENT_INITIAL;
3650
3651 return 1;
3652 }
3653
3654 static void event_log_delays(sd_event *e) {
3655 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3656 size_t l, i;
3657
3658 p = b;
3659 l = sizeof(b);
3660 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3661 l = strpcpyf(&p, l, "%u ", e->delays[i]);
3662 e->delays[i] = 0;
3663 }
3664 log_debug("Event loop iterations: %s", b);
3665 }
3666
3667 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3668 int r;
3669
3670 assert_return(e, -EINVAL);
3671 assert_return(e = event_resolve(e), -ENOPKG);
3672 assert_return(!event_pid_changed(e), -ECHILD);
3673 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3674 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3675
3676 if (e->profile_delays && e->last_run) {
3677 usec_t this_run;
3678 unsigned l;
3679
3680 this_run = now(CLOCK_MONOTONIC);
3681
3682 l = u64log2(this_run - e->last_run);
3683 assert(l < sizeof(e->delays));
3684 e->delays[l]++;
3685
3686 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3687 event_log_delays(e);
3688 e->last_log = this_run;
3689 }
3690 }
3691
3692 r = sd_event_prepare(e);
3693 if (r == 0)
3694 /* There was nothing? Then wait... */
3695 r = sd_event_wait(e, timeout);
3696
3697 if (e->profile_delays)
3698 e->last_run = now(CLOCK_MONOTONIC);
3699
3700 if (r > 0) {
3701 /* There's something now, then let's dispatch it */
3702 r = sd_event_dispatch(e);
3703 if (r < 0)
3704 return r;
3705
3706 return 1;
3707 }
3708
3709 return r;
3710 }
3711
3712 _public_ int sd_event_loop(sd_event *e) {
3713 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3714 int r;
3715
3716 assert_return(e, -EINVAL);
3717 assert_return(e = event_resolve(e), -ENOPKG);
3718 assert_return(!event_pid_changed(e), -ECHILD);
3719 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3720
3721 ref = sd_event_ref(e);
3722
3723 while (e->state != SD_EVENT_FINISHED) {
3724 r = sd_event_run(e, (uint64_t) -1);
3725 if (r < 0)
3726 return r;
3727 }
3728
3729 return e->exit_code;
3730 }
3731
3732 _public_ int sd_event_get_fd(sd_event *e) {
3733
3734 assert_return(e, -EINVAL);
3735 assert_return(e = event_resolve(e), -ENOPKG);
3736 assert_return(!event_pid_changed(e), -ECHILD);
3737
3738 return e->epoll_fd;
3739 }
3740
3741 _public_ int sd_event_get_state(sd_event *e) {
3742 assert_return(e, -EINVAL);
3743 assert_return(e = event_resolve(e), -ENOPKG);
3744 assert_return(!event_pid_changed(e), -ECHILD);
3745
3746 return e->state;
3747 }
3748
3749 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3750 assert_return(e, -EINVAL);
3751 assert_return(e = event_resolve(e), -ENOPKG);
3752 assert_return(code, -EINVAL);
3753 assert_return(!event_pid_changed(e), -ECHILD);
3754
3755 if (!e->exit_requested)
3756 return -ENODATA;
3757
3758 *code = e->exit_code;
3759 return 0;
3760 }
3761
3762 _public_ int sd_event_exit(sd_event *e, int code) {
3763 assert_return(e, -EINVAL);
3764 assert_return(e = event_resolve(e), -ENOPKG);
3765 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3766 assert_return(!event_pid_changed(e), -ECHILD);
3767
3768 e->exit_requested = true;
3769 e->exit_code = code;
3770
3771 return 0;
3772 }
3773
3774 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3775 assert_return(e, -EINVAL);
3776 assert_return(e = event_resolve(e), -ENOPKG);
3777 assert_return(usec, -EINVAL);
3778 assert_return(!event_pid_changed(e), -ECHILD);
3779
3780 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3781 return -EOPNOTSUPP;
3782
3783 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3784 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3785 * the purpose of getting the time this doesn't matter. */
3786 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3787 return -EOPNOTSUPP;
3788
3789 if (!triple_timestamp_is_set(&e->timestamp)) {
3790 /* Implicitly fall back to now() if we never ran
3791 * before and thus have no cached time. */
3792 *usec = now(clock);
3793 return 1;
3794 }
3795
3796 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3797 return 0;
3798 }
3799
3800 _public_ int sd_event_default(sd_event **ret) {
3801 sd_event *e = NULL;
3802 int r;
3803
3804 if (!ret)
3805 return !!default_event;
3806
3807 if (default_event) {
3808 *ret = sd_event_ref(default_event);
3809 return 0;
3810 }
3811
3812 r = sd_event_new(&e);
3813 if (r < 0)
3814 return r;
3815
3816 e->default_event_ptr = &default_event;
3817 e->tid = gettid();
3818 default_event = e;
3819
3820 *ret = e;
3821 return 1;
3822 }
3823
3824 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3825 assert_return(e, -EINVAL);
3826 assert_return(e = event_resolve(e), -ENOPKG);
3827 assert_return(tid, -EINVAL);
3828 assert_return(!event_pid_changed(e), -ECHILD);
3829
3830 if (e->tid != 0) {
3831 *tid = e->tid;
3832 return 0;
3833 }
3834
3835 return -ENXIO;
3836 }
3837
3838 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3839 int r;
3840
3841 assert_return(e, -EINVAL);
3842 assert_return(e = event_resolve(e), -ENOPKG);
3843 assert_return(!event_pid_changed(e), -ECHILD);
3844
3845 if (e->watchdog == !!b)
3846 return e->watchdog;
3847
3848 if (b) {
3849 struct epoll_event ev;
3850
3851 r = sd_watchdog_enabled(false, &e->watchdog_period);
3852 if (r <= 0)
3853 return r;
3854
3855 /* Issue first ping immediately */
3856 sd_notify(false, "WATCHDOG=1");
3857 e->watchdog_last = now(CLOCK_MONOTONIC);
3858
3859 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3860 if (e->watchdog_fd < 0)
3861 return -errno;
3862
3863 r = arm_watchdog(e);
3864 if (r < 0)
3865 goto fail;
3866
3867 ev = (struct epoll_event) {
3868 .events = EPOLLIN,
3869 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3870 };
3871
3872 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3873 if (r < 0) {
3874 r = -errno;
3875 goto fail;
3876 }
3877
3878 } else {
3879 if (e->watchdog_fd >= 0) {
3880 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3881 e->watchdog_fd = safe_close(e->watchdog_fd);
3882 }
3883 }
3884
3885 e->watchdog = !!b;
3886 return e->watchdog;
3887
3888 fail:
3889 e->watchdog_fd = safe_close(e->watchdog_fd);
3890 return r;
3891 }
3892
3893 _public_ int sd_event_get_watchdog(sd_event *e) {
3894 assert_return(e, -EINVAL);
3895 assert_return(e = event_resolve(e), -ENOPKG);
3896 assert_return(!event_pid_changed(e), -ECHILD);
3897
3898 return e->watchdog;
3899 }
3900
3901 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3902 assert_return(e, -EINVAL);
3903 assert_return(e = event_resolve(e), -ENOPKG);
3904 assert_return(!event_pid_changed(e), -ECHILD);
3905
3906 *ret = e->iteration;
3907 return 0;
3908 }
3909
3910 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3911 assert_return(s, -EINVAL);
3912
3913 s->destroy_callback = callback;
3914 return 0;
3915 }
3916
3917 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3918 assert_return(s, -EINVAL);
3919
3920 if (ret)
3921 *ret = s->destroy_callback;
3922
3923 return !!s->destroy_callback;
3924 }
3925
3926 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3927 assert_return(s, -EINVAL);
3928
3929 return s->floating;
3930 }
3931
3932 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3933 assert_return(s, -EINVAL);
3934
3935 if (s->floating == !!b)
3936 return 0;
3937
3938 if (!s->event) /* Already disconnected */
3939 return -ESTALE;
3940
3941 s->floating = b;
3942
3943 if (b) {
3944 sd_event_source_ref(s);
3945 sd_event_unref(s->event);
3946 } else {
3947 sd_event_ref(s->event);
3948 sd_event_source_unref(s);
3949 }
3950
3951 return 1;
3952 }