]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
sd-event: add relative timer calls
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "env-util.h"
13 #include "event-source.h"
14 #include "fd-util.h"
15 #include "fs-util.h"
16 #include "hashmap.h"
17 #include "list.h"
18 #include "macro.h"
19 #include "memory-util.h"
20 #include "missing_syscall.h"
21 #include "prioq.h"
22 #include "process-util.h"
23 #include "set.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "strxcpyx.h"
28 #include "time-util.h"
29
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32 static bool EVENT_SOURCE_WATCH_PIDFD(sd_event_source *s) {
33 /* Returns true if this is a PID event source and can be implemented by watching EPOLLIN */
34 return s &&
35 s->type == SOURCE_CHILD &&
36 s->child.pidfd >= 0 &&
37 s->child.options == WEXITED;
38 }
39
40 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
41 [SOURCE_IO] = "io",
42 [SOURCE_TIME_REALTIME] = "realtime",
43 [SOURCE_TIME_BOOTTIME] = "bootime",
44 [SOURCE_TIME_MONOTONIC] = "monotonic",
45 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
46 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
47 [SOURCE_SIGNAL] = "signal",
48 [SOURCE_CHILD] = "child",
49 [SOURCE_DEFER] = "defer",
50 [SOURCE_POST] = "post",
51 [SOURCE_EXIT] = "exit",
52 [SOURCE_WATCHDOG] = "watchdog",
53 [SOURCE_INOTIFY] = "inotify",
54 };
55
56 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
57
58 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
59
60 struct sd_event {
61 unsigned n_ref;
62
63 int epoll_fd;
64 int watchdog_fd;
65
66 Prioq *pending;
67 Prioq *prepare;
68
69 /* timerfd_create() only supports these five clocks so far. We
70 * can add support for more clocks when the kernel learns to
71 * deal with them, too. */
72 struct clock_data realtime;
73 struct clock_data boottime;
74 struct clock_data monotonic;
75 struct clock_data realtime_alarm;
76 struct clock_data boottime_alarm;
77
78 usec_t perturb;
79
80 sd_event_source **signal_sources; /* indexed by signal number */
81 Hashmap *signal_data; /* indexed by priority */
82
83 Hashmap *child_sources;
84 unsigned n_enabled_child_sources;
85
86 Set *post_sources;
87
88 Prioq *exit;
89
90 Hashmap *inotify_data; /* indexed by priority */
91
92 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
93 LIST_HEAD(struct inode_data, inode_data_to_close);
94
95 /* A list of inotify objects that already have events buffered which aren't processed yet */
96 LIST_HEAD(struct inotify_data, inotify_data_buffered);
97
98 pid_t original_pid;
99
100 uint64_t iteration;
101 triple_timestamp timestamp;
102 int state;
103
104 bool exit_requested:1;
105 bool need_process_child:1;
106 bool watchdog:1;
107 bool profile_delays:1;
108
109 int exit_code;
110
111 pid_t tid;
112 sd_event **default_event_ptr;
113
114 usec_t watchdog_last, watchdog_period;
115
116 unsigned n_sources;
117
118 struct epoll_event *event_queue;
119 size_t event_queue_allocated;
120
121 LIST_HEAD(sd_event_source, sources);
122
123 usec_t last_run, last_log;
124 unsigned delays[sizeof(usec_t) * 8];
125 };
126
127 static thread_local sd_event *default_event = NULL;
128
129 static void source_disconnect(sd_event_source *s);
130 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
131
132 static sd_event *event_resolve(sd_event *e) {
133 return e == SD_EVENT_DEFAULT ? default_event : e;
134 }
135
136 static int pending_prioq_compare(const void *a, const void *b) {
137 const sd_event_source *x = a, *y = b;
138 int r;
139
140 assert(x->pending);
141 assert(y->pending);
142
143 /* Enabled ones first */
144 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
145 return -1;
146 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
147 return 1;
148
149 /* Lower priority values first */
150 r = CMP(x->priority, y->priority);
151 if (r != 0)
152 return r;
153
154 /* Older entries first */
155 return CMP(x->pending_iteration, y->pending_iteration);
156 }
157
158 static int prepare_prioq_compare(const void *a, const void *b) {
159 const sd_event_source *x = a, *y = b;
160 int r;
161
162 assert(x->prepare);
163 assert(y->prepare);
164
165 /* Enabled ones first */
166 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
167 return -1;
168 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
169 return 1;
170
171 /* Move most recently prepared ones last, so that we can stop
172 * preparing as soon as we hit one that has already been
173 * prepared in the current iteration */
174 r = CMP(x->prepare_iteration, y->prepare_iteration);
175 if (r != 0)
176 return r;
177
178 /* Lower priority values first */
179 return CMP(x->priority, y->priority);
180 }
181
182 static int earliest_time_prioq_compare(const void *a, const void *b) {
183 const sd_event_source *x = a, *y = b;
184
185 assert(EVENT_SOURCE_IS_TIME(x->type));
186 assert(x->type == y->type);
187
188 /* Enabled ones first */
189 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
190 return -1;
191 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
192 return 1;
193
194 /* Move the pending ones to the end */
195 if (!x->pending && y->pending)
196 return -1;
197 if (x->pending && !y->pending)
198 return 1;
199
200 /* Order by time */
201 return CMP(x->time.next, y->time.next);
202 }
203
204 static usec_t time_event_source_latest(const sd_event_source *s) {
205 return usec_add(s->time.next, s->time.accuracy);
206 }
207
208 static int latest_time_prioq_compare(const void *a, const void *b) {
209 const sd_event_source *x = a, *y = b;
210
211 assert(EVENT_SOURCE_IS_TIME(x->type));
212 assert(x->type == y->type);
213
214 /* Enabled ones first */
215 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
216 return -1;
217 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
218 return 1;
219
220 /* Move the pending ones to the end */
221 if (!x->pending && y->pending)
222 return -1;
223 if (x->pending && !y->pending)
224 return 1;
225
226 /* Order by time */
227 return CMP(time_event_source_latest(x), time_event_source_latest(y));
228 }
229
230 static int exit_prioq_compare(const void *a, const void *b) {
231 const sd_event_source *x = a, *y = b;
232
233 assert(x->type == SOURCE_EXIT);
234 assert(y->type == SOURCE_EXIT);
235
236 /* Enabled ones first */
237 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
238 return -1;
239 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
240 return 1;
241
242 /* Lower priority values first */
243 return CMP(x->priority, y->priority);
244 }
245
246 static void free_clock_data(struct clock_data *d) {
247 assert(d);
248 assert(d->wakeup == WAKEUP_CLOCK_DATA);
249
250 safe_close(d->fd);
251 prioq_free(d->earliest);
252 prioq_free(d->latest);
253 }
254
255 static sd_event *event_free(sd_event *e) {
256 sd_event_source *s;
257
258 assert(e);
259
260 while ((s = e->sources)) {
261 assert(s->floating);
262 source_disconnect(s);
263 sd_event_source_unref(s);
264 }
265
266 assert(e->n_sources == 0);
267
268 if (e->default_event_ptr)
269 *(e->default_event_ptr) = NULL;
270
271 safe_close(e->epoll_fd);
272 safe_close(e->watchdog_fd);
273
274 free_clock_data(&e->realtime);
275 free_clock_data(&e->boottime);
276 free_clock_data(&e->monotonic);
277 free_clock_data(&e->realtime_alarm);
278 free_clock_data(&e->boottime_alarm);
279
280 prioq_free(e->pending);
281 prioq_free(e->prepare);
282 prioq_free(e->exit);
283
284 free(e->signal_sources);
285 hashmap_free(e->signal_data);
286
287 hashmap_free(e->inotify_data);
288
289 hashmap_free(e->child_sources);
290 set_free(e->post_sources);
291
292 free(e->event_queue);
293
294 return mfree(e);
295 }
296
297 _public_ int sd_event_new(sd_event** ret) {
298 sd_event *e;
299 int r;
300
301 assert_return(ret, -EINVAL);
302
303 e = new(sd_event, 1);
304 if (!e)
305 return -ENOMEM;
306
307 *e = (sd_event) {
308 .n_ref = 1,
309 .epoll_fd = -1,
310 .watchdog_fd = -1,
311 .realtime.wakeup = WAKEUP_CLOCK_DATA,
312 .realtime.fd = -1,
313 .realtime.next = USEC_INFINITY,
314 .boottime.wakeup = WAKEUP_CLOCK_DATA,
315 .boottime.fd = -1,
316 .boottime.next = USEC_INFINITY,
317 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
318 .monotonic.fd = -1,
319 .monotonic.next = USEC_INFINITY,
320 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
321 .realtime_alarm.fd = -1,
322 .realtime_alarm.next = USEC_INFINITY,
323 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
324 .boottime_alarm.fd = -1,
325 .boottime_alarm.next = USEC_INFINITY,
326 .perturb = USEC_INFINITY,
327 .original_pid = getpid_cached(),
328 };
329
330 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
331 if (r < 0)
332 goto fail;
333
334 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
335 if (e->epoll_fd < 0) {
336 r = -errno;
337 goto fail;
338 }
339
340 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
341
342 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
343 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
344 e->profile_delays = true;
345 }
346
347 *ret = e;
348 return 0;
349
350 fail:
351 event_free(e);
352 return r;
353 }
354
355 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
356
357 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
358 if (s)
359 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
360 return sd_event_source_unref(s);
361 }
362
363 static bool event_pid_changed(sd_event *e) {
364 assert(e);
365
366 /* We don't support people creating an event loop and keeping
367 * it around over a fork(). Let's complain. */
368
369 return e->original_pid != getpid_cached();
370 }
371
372 static void source_io_unregister(sd_event_source *s) {
373 assert(s);
374 assert(s->type == SOURCE_IO);
375
376 if (event_pid_changed(s->event))
377 return;
378
379 if (!s->io.registered)
380 return;
381
382 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL) < 0)
383 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
384 strna(s->description), event_source_type_to_string(s->type));
385
386 s->io.registered = false;
387 }
388
389 static int source_io_register(
390 sd_event_source *s,
391 int enabled,
392 uint32_t events) {
393
394 assert(s);
395 assert(s->type == SOURCE_IO);
396 assert(enabled != SD_EVENT_OFF);
397
398 struct epoll_event ev = {
399 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
400 .data.ptr = s,
401 };
402 int r;
403
404 r = epoll_ctl(s->event->epoll_fd,
405 s->io.registered ? EPOLL_CTL_MOD : EPOLL_CTL_ADD,
406 s->io.fd,
407 &ev);
408 if (r < 0)
409 return -errno;
410
411 s->io.registered = true;
412
413 return 0;
414 }
415
416 static void source_child_pidfd_unregister(sd_event_source *s) {
417 assert(s);
418 assert(s->type == SOURCE_CHILD);
419
420 if (event_pid_changed(s->event))
421 return;
422
423 if (!s->child.registered)
424 return;
425
426 if (EVENT_SOURCE_WATCH_PIDFD(s))
427 if (epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->child.pidfd, NULL) < 0)
428 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
429 strna(s->description), event_source_type_to_string(s->type));
430
431 s->child.registered = false;
432 }
433
434 static int source_child_pidfd_register(sd_event_source *s, int enabled) {
435 int r;
436
437 assert(s);
438 assert(s->type == SOURCE_CHILD);
439 assert(enabled != SD_EVENT_OFF);
440
441 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
442 struct epoll_event ev = {
443 .events = EPOLLIN | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
444 .data.ptr = s,
445 };
446
447 if (s->child.registered)
448 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->child.pidfd, &ev);
449 else
450 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->child.pidfd, &ev);
451 if (r < 0)
452 return -errno;
453 }
454
455 s->child.registered = true;
456 return 0;
457 }
458
459 static clockid_t event_source_type_to_clock(EventSourceType t) {
460
461 switch (t) {
462
463 case SOURCE_TIME_REALTIME:
464 return CLOCK_REALTIME;
465
466 case SOURCE_TIME_BOOTTIME:
467 return CLOCK_BOOTTIME;
468
469 case SOURCE_TIME_MONOTONIC:
470 return CLOCK_MONOTONIC;
471
472 case SOURCE_TIME_REALTIME_ALARM:
473 return CLOCK_REALTIME_ALARM;
474
475 case SOURCE_TIME_BOOTTIME_ALARM:
476 return CLOCK_BOOTTIME_ALARM;
477
478 default:
479 return (clockid_t) -1;
480 }
481 }
482
483 static EventSourceType clock_to_event_source_type(clockid_t clock) {
484
485 switch (clock) {
486
487 case CLOCK_REALTIME:
488 return SOURCE_TIME_REALTIME;
489
490 case CLOCK_BOOTTIME:
491 return SOURCE_TIME_BOOTTIME;
492
493 case CLOCK_MONOTONIC:
494 return SOURCE_TIME_MONOTONIC;
495
496 case CLOCK_REALTIME_ALARM:
497 return SOURCE_TIME_REALTIME_ALARM;
498
499 case CLOCK_BOOTTIME_ALARM:
500 return SOURCE_TIME_BOOTTIME_ALARM;
501
502 default:
503 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
504 }
505 }
506
507 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
508 assert(e);
509
510 switch (t) {
511
512 case SOURCE_TIME_REALTIME:
513 return &e->realtime;
514
515 case SOURCE_TIME_BOOTTIME:
516 return &e->boottime;
517
518 case SOURCE_TIME_MONOTONIC:
519 return &e->monotonic;
520
521 case SOURCE_TIME_REALTIME_ALARM:
522 return &e->realtime_alarm;
523
524 case SOURCE_TIME_BOOTTIME_ALARM:
525 return &e->boottime_alarm;
526
527 default:
528 return NULL;
529 }
530 }
531
532 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
533 assert(e);
534
535 if (!d)
536 return;
537
538 hashmap_remove(e->signal_data, &d->priority);
539 safe_close(d->fd);
540 free(d);
541 }
542
543 static int event_make_signal_data(
544 sd_event *e,
545 int sig,
546 struct signal_data **ret) {
547
548 struct signal_data *d;
549 bool added = false;
550 sigset_t ss_copy;
551 int64_t priority;
552 int r;
553
554 assert(e);
555
556 if (event_pid_changed(e))
557 return -ECHILD;
558
559 if (e->signal_sources && e->signal_sources[sig])
560 priority = e->signal_sources[sig]->priority;
561 else
562 priority = SD_EVENT_PRIORITY_NORMAL;
563
564 d = hashmap_get(e->signal_data, &priority);
565 if (d) {
566 if (sigismember(&d->sigset, sig) > 0) {
567 if (ret)
568 *ret = d;
569 return 0;
570 }
571 } else {
572 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
573 if (r < 0)
574 return r;
575
576 d = new(struct signal_data, 1);
577 if (!d)
578 return -ENOMEM;
579
580 *d = (struct signal_data) {
581 .wakeup = WAKEUP_SIGNAL_DATA,
582 .fd = -1,
583 .priority = priority,
584 };
585
586 r = hashmap_put(e->signal_data, &d->priority, d);
587 if (r < 0) {
588 free(d);
589 return r;
590 }
591
592 added = true;
593 }
594
595 ss_copy = d->sigset;
596 assert_se(sigaddset(&ss_copy, sig) >= 0);
597
598 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
599 if (r < 0) {
600 r = -errno;
601 goto fail;
602 }
603
604 d->sigset = ss_copy;
605
606 if (d->fd >= 0) {
607 if (ret)
608 *ret = d;
609 return 0;
610 }
611
612 d->fd = fd_move_above_stdio(r);
613
614 struct epoll_event ev = {
615 .events = EPOLLIN,
616 .data.ptr = d,
617 };
618
619 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
620 if (r < 0) {
621 r = -errno;
622 goto fail;
623 }
624
625 if (ret)
626 *ret = d;
627
628 return 0;
629
630 fail:
631 if (added)
632 event_free_signal_data(e, d);
633
634 return r;
635 }
636
637 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
638 assert(e);
639 assert(d);
640
641 /* Turns off the specified signal in the signal data
642 * object. If the signal mask of the object becomes empty that
643 * way removes it. */
644
645 if (sigismember(&d->sigset, sig) == 0)
646 return;
647
648 assert_se(sigdelset(&d->sigset, sig) >= 0);
649
650 if (sigisemptyset(&d->sigset)) {
651 /* If all the mask is all-zero we can get rid of the structure */
652 event_free_signal_data(e, d);
653 return;
654 }
655
656 assert(d->fd >= 0);
657
658 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
659 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
660 }
661
662 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
663 struct signal_data *d;
664 static const int64_t zero_priority = 0;
665
666 assert(e);
667
668 /* Rechecks if the specified signal is still something we are interested in. If not, we'll unmask it,
669 * and possibly drop the signalfd for it. */
670
671 if (sig == SIGCHLD &&
672 e->n_enabled_child_sources > 0)
673 return;
674
675 if (e->signal_sources &&
676 e->signal_sources[sig] &&
677 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
678 return;
679
680 /*
681 * The specified signal might be enabled in three different queues:
682 *
683 * 1) the one that belongs to the priority passed (if it is non-NULL)
684 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
685 * 3) the 0 priority (to cover the SIGCHLD case)
686 *
687 * Hence, let's remove it from all three here.
688 */
689
690 if (priority) {
691 d = hashmap_get(e->signal_data, priority);
692 if (d)
693 event_unmask_signal_data(e, d, sig);
694 }
695
696 if (e->signal_sources && e->signal_sources[sig]) {
697 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
698 if (d)
699 event_unmask_signal_data(e, d, sig);
700 }
701
702 d = hashmap_get(e->signal_data, &zero_priority);
703 if (d)
704 event_unmask_signal_data(e, d, sig);
705 }
706
707 static void source_disconnect(sd_event_source *s) {
708 sd_event *event;
709
710 assert(s);
711
712 if (!s->event)
713 return;
714
715 assert(s->event->n_sources > 0);
716
717 switch (s->type) {
718
719 case SOURCE_IO:
720 if (s->io.fd >= 0)
721 source_io_unregister(s);
722
723 break;
724
725 case SOURCE_TIME_REALTIME:
726 case SOURCE_TIME_BOOTTIME:
727 case SOURCE_TIME_MONOTONIC:
728 case SOURCE_TIME_REALTIME_ALARM:
729 case SOURCE_TIME_BOOTTIME_ALARM: {
730 struct clock_data *d;
731
732 d = event_get_clock_data(s->event, s->type);
733 assert(d);
734
735 prioq_remove(d->earliest, s, &s->time.earliest_index);
736 prioq_remove(d->latest, s, &s->time.latest_index);
737 d->needs_rearm = true;
738 break;
739 }
740
741 case SOURCE_SIGNAL:
742 if (s->signal.sig > 0) {
743
744 if (s->event->signal_sources)
745 s->event->signal_sources[s->signal.sig] = NULL;
746
747 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
748 }
749
750 break;
751
752 case SOURCE_CHILD:
753 if (s->child.pid > 0) {
754 if (s->enabled != SD_EVENT_OFF) {
755 assert(s->event->n_enabled_child_sources > 0);
756 s->event->n_enabled_child_sources--;
757 }
758
759 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
760 }
761
762 if (EVENT_SOURCE_WATCH_PIDFD(s))
763 source_child_pidfd_unregister(s);
764 else
765 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
766
767 break;
768
769 case SOURCE_DEFER:
770 /* nothing */
771 break;
772
773 case SOURCE_POST:
774 set_remove(s->event->post_sources, s);
775 break;
776
777 case SOURCE_EXIT:
778 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
779 break;
780
781 case SOURCE_INOTIFY: {
782 struct inode_data *inode_data;
783
784 inode_data = s->inotify.inode_data;
785 if (inode_data) {
786 struct inotify_data *inotify_data;
787 assert_se(inotify_data = inode_data->inotify_data);
788
789 /* Detach this event source from the inode object */
790 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
791 s->inotify.inode_data = NULL;
792
793 if (s->pending) {
794 assert(inotify_data->n_pending > 0);
795 inotify_data->n_pending--;
796 }
797
798 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
799 * continued to being watched. That's because inotify doesn't really have an API for that: we
800 * can only change watch masks with access to the original inode either by fd or by path. But
801 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
802 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
803 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
804 * there), but given the need for open_by_handle_at() which is privileged and not universally
805 * available this would be quite an incomplete solution. Hence we go the other way, leave the
806 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
807 * anymore after reception. Yes, this sucks, but … Linux … */
808
809 /* Maybe release the inode data (and its inotify) */
810 event_gc_inode_data(s->event, inode_data);
811 }
812
813 break;
814 }
815
816 default:
817 assert_not_reached("Wut? I shouldn't exist.");
818 }
819
820 if (s->pending)
821 prioq_remove(s->event->pending, s, &s->pending_index);
822
823 if (s->prepare)
824 prioq_remove(s->event->prepare, s, &s->prepare_index);
825
826 event = TAKE_PTR(s->event);
827 LIST_REMOVE(sources, event->sources, s);
828 event->n_sources--;
829
830 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
831 * pidfd associated with this event source, which we'll do only on source_free(). */
832
833 if (!s->floating)
834 sd_event_unref(event);
835 }
836
837 static void source_free(sd_event_source *s) {
838 assert(s);
839
840 source_disconnect(s);
841
842 if (s->type == SOURCE_IO && s->io.owned)
843 s->io.fd = safe_close(s->io.fd);
844
845 if (s->type == SOURCE_CHILD) {
846 /* Eventually the kernel will do this automatically for us, but for now let's emulate this (unreliably) in userspace. */
847
848 if (s->child.process_owned) {
849
850 if (!s->child.exited) {
851 bool sent = false;
852
853 if (s->child.pidfd >= 0) {
854 if (pidfd_send_signal(s->child.pidfd, SIGKILL, NULL, 0) < 0) {
855 if (errno == ESRCH) /* Already dead */
856 sent = true;
857 else if (!ERRNO_IS_NOT_SUPPORTED(errno))
858 log_debug_errno(errno, "Failed to kill process " PID_FMT " via pidfd_send_signal(), re-trying via kill(): %m",
859 s->child.pid);
860 } else
861 sent = true;
862 }
863
864 if (!sent)
865 if (kill(s->child.pid, SIGKILL) < 0)
866 if (errno != ESRCH) /* Already dead */
867 log_debug_errno(errno, "Failed to kill process " PID_FMT " via kill(), ignoring: %m",
868 s->child.pid);
869 }
870
871 if (!s->child.waited) {
872 siginfo_t si = {};
873
874 /* Reap the child if we can */
875 (void) waitid(P_PID, s->child.pid, &si, WEXITED);
876 }
877 }
878
879 if (s->child.pidfd_owned)
880 s->child.pidfd = safe_close(s->child.pidfd);
881 }
882
883 if (s->destroy_callback)
884 s->destroy_callback(s->userdata);
885
886 free(s->description);
887 free(s);
888 }
889 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
890
891 static int source_set_pending(sd_event_source *s, bool b) {
892 int r;
893
894 assert(s);
895 assert(s->type != SOURCE_EXIT);
896
897 if (s->pending == b)
898 return 0;
899
900 s->pending = b;
901
902 if (b) {
903 s->pending_iteration = s->event->iteration;
904
905 r = prioq_put(s->event->pending, s, &s->pending_index);
906 if (r < 0) {
907 s->pending = false;
908 return r;
909 }
910 } else
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
912
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
915
916 d = event_get_clock_data(s->event, s->type);
917 assert(d);
918
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
922 }
923
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
926
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
929 d->current = NULL;
930 }
931
932 if (s->type == SOURCE_INOTIFY) {
933
934 assert(s->inotify.inode_data);
935 assert(s->inotify.inode_data->inotify_data);
936
937 if (b)
938 s->inotify.inode_data->inotify_data->n_pending ++;
939 else {
940 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
941 s->inotify.inode_data->inotify_data->n_pending --;
942 }
943 }
944
945 return 0;
946 }
947
948 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
949 sd_event_source *s;
950
951 assert(e);
952
953 s = new(sd_event_source, 1);
954 if (!s)
955 return NULL;
956
957 *s = (struct sd_event_source) {
958 .n_ref = 1,
959 .event = e,
960 .floating = floating,
961 .type = type,
962 .pending_index = PRIOQ_IDX_NULL,
963 .prepare_index = PRIOQ_IDX_NULL,
964 };
965
966 if (!floating)
967 sd_event_ref(e);
968
969 LIST_PREPEND(sources, e->sources, s);
970 e->n_sources++;
971
972 return s;
973 }
974
975 _public_ int sd_event_add_io(
976 sd_event *e,
977 sd_event_source **ret,
978 int fd,
979 uint32_t events,
980 sd_event_io_handler_t callback,
981 void *userdata) {
982
983 _cleanup_(source_freep) sd_event_source *s = NULL;
984 int r;
985
986 assert_return(e, -EINVAL);
987 assert_return(e = event_resolve(e), -ENOPKG);
988 assert_return(fd >= 0, -EBADF);
989 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
990 assert_return(callback, -EINVAL);
991 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
992 assert_return(!event_pid_changed(e), -ECHILD);
993
994 s = source_new(e, !ret, SOURCE_IO);
995 if (!s)
996 return -ENOMEM;
997
998 s->wakeup = WAKEUP_EVENT_SOURCE;
999 s->io.fd = fd;
1000 s->io.events = events;
1001 s->io.callback = callback;
1002 s->userdata = userdata;
1003 s->enabled = SD_EVENT_ON;
1004
1005 r = source_io_register(s, s->enabled, events);
1006 if (r < 0)
1007 return r;
1008
1009 if (ret)
1010 *ret = s;
1011 TAKE_PTR(s);
1012
1013 return 0;
1014 }
1015
1016 static void initialize_perturb(sd_event *e) {
1017 sd_id128_t bootid = {};
1018
1019 /* When we sleep for longer, we try to realign the wakeup to
1020 the same time within each minute/second/250ms, so that
1021 events all across the system can be coalesced into a single
1022 CPU wakeup. However, let's take some system-specific
1023 randomness for this value, so that in a network of systems
1024 with synced clocks timer events are distributed a
1025 bit. Here, we calculate a perturbation usec offset from the
1026 boot ID. */
1027
1028 if (_likely_(e->perturb != USEC_INFINITY))
1029 return;
1030
1031 if (sd_id128_get_boot(&bootid) >= 0)
1032 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1033 }
1034
1035 static int event_setup_timer_fd(
1036 sd_event *e,
1037 struct clock_data *d,
1038 clockid_t clock) {
1039
1040 assert(e);
1041 assert(d);
1042
1043 if (_likely_(d->fd >= 0))
1044 return 0;
1045
1046 _cleanup_close_ int fd = -1;
1047 int r;
1048
1049 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1050 if (fd < 0)
1051 return -errno;
1052
1053 fd = fd_move_above_stdio(fd);
1054
1055 struct epoll_event ev = {
1056 .events = EPOLLIN,
1057 .data.ptr = d,
1058 };
1059
1060 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1061 if (r < 0)
1062 return -errno;
1063
1064 d->fd = TAKE_FD(fd);
1065 return 0;
1066 }
1067
1068 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1069 assert(s);
1070
1071 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1072 }
1073
1074 _public_ int sd_event_add_time(
1075 sd_event *e,
1076 sd_event_source **ret,
1077 clockid_t clock,
1078 uint64_t usec,
1079 uint64_t accuracy,
1080 sd_event_time_handler_t callback,
1081 void *userdata) {
1082
1083 EventSourceType type;
1084 _cleanup_(source_freep) sd_event_source *s = NULL;
1085 struct clock_data *d;
1086 int r;
1087
1088 assert_return(e, -EINVAL);
1089 assert_return(e = event_resolve(e), -ENOPKG);
1090 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1091 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1092 assert_return(!event_pid_changed(e), -ECHILD);
1093
1094 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1095 return -EOPNOTSUPP;
1096
1097 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1098 if (type < 0)
1099 return -EOPNOTSUPP;
1100
1101 if (!callback)
1102 callback = time_exit_callback;
1103
1104 d = event_get_clock_data(e, type);
1105 assert(d);
1106
1107 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1108 if (r < 0)
1109 return r;
1110
1111 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1112 if (r < 0)
1113 return r;
1114
1115 if (d->fd < 0) {
1116 r = event_setup_timer_fd(e, d, clock);
1117 if (r < 0)
1118 return r;
1119 }
1120
1121 s = source_new(e, !ret, type);
1122 if (!s)
1123 return -ENOMEM;
1124
1125 s->time.next = usec;
1126 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1127 s->time.callback = callback;
1128 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1129 s->userdata = userdata;
1130 s->enabled = SD_EVENT_ONESHOT;
1131
1132 d->needs_rearm = true;
1133
1134 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1135 if (r < 0)
1136 return r;
1137
1138 r = prioq_put(d->latest, s, &s->time.latest_index);
1139 if (r < 0)
1140 return r;
1141
1142 if (ret)
1143 *ret = s;
1144 TAKE_PTR(s);
1145
1146 return 0;
1147 }
1148
1149 _public_ int sd_event_add_time_relative(
1150 sd_event *e,
1151 sd_event_source **ret,
1152 clockid_t clock,
1153 uint64_t usec,
1154 uint64_t accuracy,
1155 sd_event_time_handler_t callback,
1156 void *userdata) {
1157
1158 usec_t t;
1159 int r;
1160
1161 /* Same as sd_event_add_time() but operates relative to the event loop's current point in time, and
1162 * checks for overflow. */
1163
1164 r = sd_event_now(e, clock, &t);
1165 if (r < 0)
1166 return r;
1167
1168 if (usec >= USEC_INFINITY - t)
1169 return -EOVERFLOW;
1170
1171 return sd_event_add_time(e, ret, clock, t + usec, accuracy, callback, userdata);
1172 }
1173
1174 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1175 assert(s);
1176
1177 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1178 }
1179
1180 _public_ int sd_event_add_signal(
1181 sd_event *e,
1182 sd_event_source **ret,
1183 int sig,
1184 sd_event_signal_handler_t callback,
1185 void *userdata) {
1186
1187 _cleanup_(source_freep) sd_event_source *s = NULL;
1188 struct signal_data *d;
1189 int r;
1190
1191 assert_return(e, -EINVAL);
1192 assert_return(e = event_resolve(e), -ENOPKG);
1193 assert_return(SIGNAL_VALID(sig), -EINVAL);
1194 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1195 assert_return(!event_pid_changed(e), -ECHILD);
1196
1197 if (!callback)
1198 callback = signal_exit_callback;
1199
1200 r = signal_is_blocked(sig);
1201 if (r < 0)
1202 return r;
1203 if (r == 0)
1204 return -EBUSY;
1205
1206 if (!e->signal_sources) {
1207 e->signal_sources = new0(sd_event_source*, _NSIG);
1208 if (!e->signal_sources)
1209 return -ENOMEM;
1210 } else if (e->signal_sources[sig])
1211 return -EBUSY;
1212
1213 s = source_new(e, !ret, SOURCE_SIGNAL);
1214 if (!s)
1215 return -ENOMEM;
1216
1217 s->signal.sig = sig;
1218 s->signal.callback = callback;
1219 s->userdata = userdata;
1220 s->enabled = SD_EVENT_ON;
1221
1222 e->signal_sources[sig] = s;
1223
1224 r = event_make_signal_data(e, sig, &d);
1225 if (r < 0)
1226 return r;
1227
1228 /* Use the signal name as description for the event source by default */
1229 (void) sd_event_source_set_description(s, signal_to_string(sig));
1230
1231 if (ret)
1232 *ret = s;
1233 TAKE_PTR(s);
1234
1235 return 0;
1236 }
1237
1238 static bool shall_use_pidfd(void) {
1239 /* Mostly relevant for debugging, i.e. this is used in test-event.c to test the event loop once with and once without pidfd */
1240 return getenv_bool_secure("SYSTEMD_PIDFD") != 0;
1241 }
1242
1243 _public_ int sd_event_add_child(
1244 sd_event *e,
1245 sd_event_source **ret,
1246 pid_t pid,
1247 int options,
1248 sd_event_child_handler_t callback,
1249 void *userdata) {
1250
1251 _cleanup_(source_freep) sd_event_source *s = NULL;
1252 int r;
1253
1254 assert_return(e, -EINVAL);
1255 assert_return(e = event_resolve(e), -ENOPKG);
1256 assert_return(pid > 1, -EINVAL);
1257 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1258 assert_return(options != 0, -EINVAL);
1259 assert_return(callback, -EINVAL);
1260 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1261 assert_return(!event_pid_changed(e), -ECHILD);
1262
1263 if (e->n_enabled_child_sources == 0) {
1264 /* Caller must block SIGCHLD before using us to watch children, even if pidfd is available,
1265 * for compatibility with pre-pidfd and because we don't want the reap the child processes
1266 * ourselves, i.e. call waitid(), and don't want Linux' default internal logic for that to
1267 * take effect.
1268 *
1269 * (As an optimization we only do this check on the first child event source created.) */
1270 r = signal_is_blocked(SIGCHLD);
1271 if (r < 0)
1272 return r;
1273 if (r == 0)
1274 return -EBUSY;
1275 }
1276
1277 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1278 if (r < 0)
1279 return r;
1280
1281 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1282 return -EBUSY;
1283
1284 s = source_new(e, !ret, SOURCE_CHILD);
1285 if (!s)
1286 return -ENOMEM;
1287
1288 s->wakeup = WAKEUP_EVENT_SOURCE;
1289 s->child.pid = pid;
1290 s->child.options = options;
1291 s->child.callback = callback;
1292 s->userdata = userdata;
1293 s->enabled = SD_EVENT_ONESHOT;
1294
1295 /* We always take a pidfd here if we can, even if we wait for anything else than WEXITED, so that we
1296 * pin the PID, and make regular waitid() handling race-free. */
1297
1298 if (shall_use_pidfd()) {
1299 s->child.pidfd = pidfd_open(s->child.pid, 0);
1300 if (s->child.pidfd < 0) {
1301 /* Propagate errors unless the syscall is not supported or blocked */
1302 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
1303 return -errno;
1304 } else
1305 s->child.pidfd_owned = true; /* If we allocate the pidfd we own it by default */
1306 } else
1307 s->child.pidfd = -1;
1308
1309 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1310 if (r < 0)
1311 return r;
1312
1313 e->n_enabled_child_sources++;
1314
1315 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1316 /* We have a pidfd and we only want to watch for exit */
1317
1318 r = source_child_pidfd_register(s, s->enabled);
1319 if (r < 0) {
1320 e->n_enabled_child_sources--;
1321 return r;
1322 }
1323 } else {
1324 /* We have no pidfd or we shall wait for some other event than WEXITED */
1325
1326 r = event_make_signal_data(e, SIGCHLD, NULL);
1327 if (r < 0) {
1328 e->n_enabled_child_sources--;
1329 return r;
1330 }
1331
1332 e->need_process_child = true;
1333 }
1334
1335 if (ret)
1336 *ret = s;
1337
1338 TAKE_PTR(s);
1339 return 0;
1340 }
1341
1342 _public_ int sd_event_add_child_pidfd(
1343 sd_event *e,
1344 sd_event_source **ret,
1345 int pidfd,
1346 int options,
1347 sd_event_child_handler_t callback,
1348 void *userdata) {
1349
1350
1351 _cleanup_(source_freep) sd_event_source *s = NULL;
1352 pid_t pid;
1353 int r;
1354
1355 assert_return(e, -EINVAL);
1356 assert_return(e = event_resolve(e), -ENOPKG);
1357 assert_return(pidfd >= 0, -EBADF);
1358 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1359 assert_return(options != 0, -EINVAL);
1360 assert_return(callback, -EINVAL);
1361 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1362 assert_return(!event_pid_changed(e), -ECHILD);
1363
1364 if (e->n_enabled_child_sources == 0) {
1365 r = signal_is_blocked(SIGCHLD);
1366 if (r < 0)
1367 return r;
1368 if (r == 0)
1369 return -EBUSY;
1370 }
1371
1372 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1373 if (r < 0)
1374 return r;
1375
1376 r = pidfd_get_pid(pidfd, &pid);
1377 if (r < 0)
1378 return r;
1379
1380 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1381 return -EBUSY;
1382
1383 s = source_new(e, !ret, SOURCE_CHILD);
1384 if (!s)
1385 return -ENOMEM;
1386
1387 s->wakeup = WAKEUP_EVENT_SOURCE;
1388 s->child.pidfd = pidfd;
1389 s->child.pid = pid;
1390 s->child.options = options;
1391 s->child.callback = callback;
1392 s->child.pidfd_owned = false; /* If we got the pidfd passed in we don't own it by default (similar to the IO fd case) */
1393 s->userdata = userdata;
1394 s->enabled = SD_EVENT_ONESHOT;
1395
1396 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1397 if (r < 0)
1398 return r;
1399
1400 e->n_enabled_child_sources++;
1401
1402 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
1403 /* We only want to watch for WEXITED */
1404
1405 r = source_child_pidfd_register(s, s->enabled);
1406 if (r < 0) {
1407 e->n_enabled_child_sources--;
1408 return r;
1409 }
1410 } else {
1411 /* We shall wait for some other event than WEXITED */
1412
1413 r = event_make_signal_data(e, SIGCHLD, NULL);
1414 if (r < 0) {
1415 e->n_enabled_child_sources--;
1416 return r;
1417 }
1418
1419 e->need_process_child = true;
1420 }
1421
1422 if (ret)
1423 *ret = s;
1424
1425 TAKE_PTR(s);
1426 return 0;
1427 }
1428
1429 _public_ int sd_event_add_defer(
1430 sd_event *e,
1431 sd_event_source **ret,
1432 sd_event_handler_t callback,
1433 void *userdata) {
1434
1435 _cleanup_(source_freep) sd_event_source *s = NULL;
1436 int r;
1437
1438 assert_return(e, -EINVAL);
1439 assert_return(e = event_resolve(e), -ENOPKG);
1440 assert_return(callback, -EINVAL);
1441 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1442 assert_return(!event_pid_changed(e), -ECHILD);
1443
1444 s = source_new(e, !ret, SOURCE_DEFER);
1445 if (!s)
1446 return -ENOMEM;
1447
1448 s->defer.callback = callback;
1449 s->userdata = userdata;
1450 s->enabled = SD_EVENT_ONESHOT;
1451
1452 r = source_set_pending(s, true);
1453 if (r < 0)
1454 return r;
1455
1456 if (ret)
1457 *ret = s;
1458 TAKE_PTR(s);
1459
1460 return 0;
1461 }
1462
1463 _public_ int sd_event_add_post(
1464 sd_event *e,
1465 sd_event_source **ret,
1466 sd_event_handler_t callback,
1467 void *userdata) {
1468
1469 _cleanup_(source_freep) sd_event_source *s = NULL;
1470 int r;
1471
1472 assert_return(e, -EINVAL);
1473 assert_return(e = event_resolve(e), -ENOPKG);
1474 assert_return(callback, -EINVAL);
1475 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1476 assert_return(!event_pid_changed(e), -ECHILD);
1477
1478 s = source_new(e, !ret, SOURCE_POST);
1479 if (!s)
1480 return -ENOMEM;
1481
1482 s->post.callback = callback;
1483 s->userdata = userdata;
1484 s->enabled = SD_EVENT_ON;
1485
1486 r = set_ensure_put(&e->post_sources, NULL, s);
1487 if (r < 0)
1488 return r;
1489 assert(r > 0);
1490
1491 if (ret)
1492 *ret = s;
1493 TAKE_PTR(s);
1494
1495 return 0;
1496 }
1497
1498 _public_ int sd_event_add_exit(
1499 sd_event *e,
1500 sd_event_source **ret,
1501 sd_event_handler_t callback,
1502 void *userdata) {
1503
1504 _cleanup_(source_freep) sd_event_source *s = NULL;
1505 int r;
1506
1507 assert_return(e, -EINVAL);
1508 assert_return(e = event_resolve(e), -ENOPKG);
1509 assert_return(callback, -EINVAL);
1510 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1511 assert_return(!event_pid_changed(e), -ECHILD);
1512
1513 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1514 if (r < 0)
1515 return r;
1516
1517 s = source_new(e, !ret, SOURCE_EXIT);
1518 if (!s)
1519 return -ENOMEM;
1520
1521 s->exit.callback = callback;
1522 s->userdata = userdata;
1523 s->exit.prioq_index = PRIOQ_IDX_NULL;
1524 s->enabled = SD_EVENT_ONESHOT;
1525
1526 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1527 if (r < 0)
1528 return r;
1529
1530 if (ret)
1531 *ret = s;
1532 TAKE_PTR(s);
1533
1534 return 0;
1535 }
1536
1537 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1538 assert(e);
1539
1540 if (!d)
1541 return;
1542
1543 assert(hashmap_isempty(d->inodes));
1544 assert(hashmap_isempty(d->wd));
1545
1546 if (d->buffer_filled > 0)
1547 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1548
1549 hashmap_free(d->inodes);
1550 hashmap_free(d->wd);
1551
1552 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1553
1554 if (d->fd >= 0) {
1555 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1556 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1557
1558 safe_close(d->fd);
1559 }
1560 free(d);
1561 }
1562
1563 static int event_make_inotify_data(
1564 sd_event *e,
1565 int64_t priority,
1566 struct inotify_data **ret) {
1567
1568 _cleanup_close_ int fd = -1;
1569 struct inotify_data *d;
1570 int r;
1571
1572 assert(e);
1573
1574 d = hashmap_get(e->inotify_data, &priority);
1575 if (d) {
1576 if (ret)
1577 *ret = d;
1578 return 0;
1579 }
1580
1581 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1582 if (fd < 0)
1583 return -errno;
1584
1585 fd = fd_move_above_stdio(fd);
1586
1587 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1588 if (r < 0)
1589 return r;
1590
1591 d = new(struct inotify_data, 1);
1592 if (!d)
1593 return -ENOMEM;
1594
1595 *d = (struct inotify_data) {
1596 .wakeup = WAKEUP_INOTIFY_DATA,
1597 .fd = TAKE_FD(fd),
1598 .priority = priority,
1599 };
1600
1601 r = hashmap_put(e->inotify_data, &d->priority, d);
1602 if (r < 0) {
1603 d->fd = safe_close(d->fd);
1604 free(d);
1605 return r;
1606 }
1607
1608 struct epoll_event ev = {
1609 .events = EPOLLIN,
1610 .data.ptr = d,
1611 };
1612
1613 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1614 r = -errno;
1615 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1616 * remove the fd from the epoll first, which we don't want as we couldn't
1617 * add it in the first place. */
1618 event_free_inotify_data(e, d);
1619 return r;
1620 }
1621
1622 if (ret)
1623 *ret = d;
1624
1625 return 1;
1626 }
1627
1628 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1629 int r;
1630
1631 assert(x);
1632 assert(y);
1633
1634 r = CMP(x->dev, y->dev);
1635 if (r != 0)
1636 return r;
1637
1638 return CMP(x->ino, y->ino);
1639 }
1640
1641 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1642 assert(d);
1643
1644 siphash24_compress(&d->dev, sizeof(d->dev), state);
1645 siphash24_compress(&d->ino, sizeof(d->ino), state);
1646 }
1647
1648 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1649
1650 static void event_free_inode_data(
1651 sd_event *e,
1652 struct inode_data *d) {
1653
1654 assert(e);
1655
1656 if (!d)
1657 return;
1658
1659 assert(!d->event_sources);
1660
1661 if (d->fd >= 0) {
1662 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1663 safe_close(d->fd);
1664 }
1665
1666 if (d->inotify_data) {
1667
1668 if (d->wd >= 0) {
1669 if (d->inotify_data->fd >= 0) {
1670 /* So here's a problem. At the time this runs the watch descriptor might already be
1671 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1672 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1673 * likely case to happen. */
1674
1675 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1676 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1677 }
1678
1679 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1680 }
1681
1682 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1683 }
1684
1685 free(d);
1686 }
1687
1688 static void event_gc_inode_data(
1689 sd_event *e,
1690 struct inode_data *d) {
1691
1692 struct inotify_data *inotify_data;
1693
1694 assert(e);
1695
1696 if (!d)
1697 return;
1698
1699 if (d->event_sources)
1700 return;
1701
1702 inotify_data = d->inotify_data;
1703 event_free_inode_data(e, d);
1704
1705 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1706 event_free_inotify_data(e, inotify_data);
1707 }
1708
1709 static int event_make_inode_data(
1710 sd_event *e,
1711 struct inotify_data *inotify_data,
1712 dev_t dev,
1713 ino_t ino,
1714 struct inode_data **ret) {
1715
1716 struct inode_data *d, key;
1717 int r;
1718
1719 assert(e);
1720 assert(inotify_data);
1721
1722 key = (struct inode_data) {
1723 .ino = ino,
1724 .dev = dev,
1725 };
1726
1727 d = hashmap_get(inotify_data->inodes, &key);
1728 if (d) {
1729 if (ret)
1730 *ret = d;
1731
1732 return 0;
1733 }
1734
1735 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1736 if (r < 0)
1737 return r;
1738
1739 d = new(struct inode_data, 1);
1740 if (!d)
1741 return -ENOMEM;
1742
1743 *d = (struct inode_data) {
1744 .dev = dev,
1745 .ino = ino,
1746 .wd = -1,
1747 .fd = -1,
1748 .inotify_data = inotify_data,
1749 };
1750
1751 r = hashmap_put(inotify_data->inodes, d, d);
1752 if (r < 0) {
1753 free(d);
1754 return r;
1755 }
1756
1757 if (ret)
1758 *ret = d;
1759
1760 return 1;
1761 }
1762
1763 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1764 bool excl_unlink = true;
1765 uint32_t combined = 0;
1766 sd_event_source *s;
1767
1768 assert(d);
1769
1770 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1771 * the IN_EXCL_UNLINK flag is ANDed instead.
1772 *
1773 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1774 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1775 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1776 * events we don't care for client-side. */
1777
1778 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1779
1780 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1781 excl_unlink = false;
1782
1783 combined |= s->inotify.mask;
1784 }
1785
1786 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1787 }
1788
1789 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1790 uint32_t combined_mask;
1791 int wd, r;
1792
1793 assert(d);
1794 assert(d->fd >= 0);
1795
1796 combined_mask = inode_data_determine_mask(d);
1797
1798 if (d->wd >= 0 && combined_mask == d->combined_mask)
1799 return 0;
1800
1801 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1802 if (r < 0)
1803 return r;
1804
1805 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1806 if (wd < 0)
1807 return -errno;
1808
1809 if (d->wd < 0) {
1810 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1811 if (r < 0) {
1812 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1813 return r;
1814 }
1815
1816 d->wd = wd;
1817
1818 } else if (d->wd != wd) {
1819
1820 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1821 (void) inotify_rm_watch(d->fd, wd);
1822 return -EINVAL;
1823 }
1824
1825 d->combined_mask = combined_mask;
1826 return 1;
1827 }
1828
1829 _public_ int sd_event_add_inotify(
1830 sd_event *e,
1831 sd_event_source **ret,
1832 const char *path,
1833 uint32_t mask,
1834 sd_event_inotify_handler_t callback,
1835 void *userdata) {
1836
1837 struct inotify_data *inotify_data = NULL;
1838 struct inode_data *inode_data = NULL;
1839 _cleanup_close_ int fd = -1;
1840 _cleanup_(source_freep) sd_event_source *s = NULL;
1841 struct stat st;
1842 int r;
1843
1844 assert_return(e, -EINVAL);
1845 assert_return(e = event_resolve(e), -ENOPKG);
1846 assert_return(path, -EINVAL);
1847 assert_return(callback, -EINVAL);
1848 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1849 assert_return(!event_pid_changed(e), -ECHILD);
1850
1851 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1852 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1853 * the user can't use them for us. */
1854 if (mask & IN_MASK_ADD)
1855 return -EINVAL;
1856
1857 fd = open(path, O_PATH|O_CLOEXEC|
1858 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1859 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1860 if (fd < 0)
1861 return -errno;
1862
1863 if (fstat(fd, &st) < 0)
1864 return -errno;
1865
1866 s = source_new(e, !ret, SOURCE_INOTIFY);
1867 if (!s)
1868 return -ENOMEM;
1869
1870 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1871 s->inotify.mask = mask;
1872 s->inotify.callback = callback;
1873 s->userdata = userdata;
1874
1875 /* Allocate an inotify object for this priority, and an inode object within it */
1876 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1877 if (r < 0)
1878 return r;
1879
1880 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1881 if (r < 0) {
1882 event_free_inotify_data(e, inotify_data);
1883 return r;
1884 }
1885
1886 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1887 * the event source, until then, for which we need the original inode. */
1888 if (inode_data->fd < 0) {
1889 inode_data->fd = TAKE_FD(fd);
1890 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1891 }
1892
1893 /* Link our event source to the inode data object */
1894 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1895 s->inotify.inode_data = inode_data;
1896
1897 /* Actually realize the watch now */
1898 r = inode_data_realize_watch(e, inode_data);
1899 if (r < 0)
1900 return r;
1901
1902 (void) sd_event_source_set_description(s, path);
1903
1904 if (ret)
1905 *ret = s;
1906 TAKE_PTR(s);
1907
1908 return 0;
1909 }
1910
1911 static sd_event_source* event_source_free(sd_event_source *s) {
1912 if (!s)
1913 return NULL;
1914
1915 /* Here's a special hack: when we are called from a
1916 * dispatch handler we won't free the event source
1917 * immediately, but we will detach the fd from the
1918 * epoll. This way it is safe for the caller to unref
1919 * the event source and immediately close the fd, but
1920 * we still retain a valid event source object after
1921 * the callback. */
1922
1923 if (s->dispatching) {
1924 if (s->type == SOURCE_IO)
1925 source_io_unregister(s);
1926
1927 source_disconnect(s);
1928 } else
1929 source_free(s);
1930
1931 return NULL;
1932 }
1933
1934 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1935
1936 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1937 assert_return(s, -EINVAL);
1938 assert_return(!event_pid_changed(s->event), -ECHILD);
1939
1940 return free_and_strdup(&s->description, description);
1941 }
1942
1943 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1944 assert_return(s, -EINVAL);
1945 assert_return(description, -EINVAL);
1946 assert_return(!event_pid_changed(s->event), -ECHILD);
1947
1948 if (!s->description)
1949 return -ENXIO;
1950
1951 *description = s->description;
1952 return 0;
1953 }
1954
1955 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1956 assert_return(s, NULL);
1957
1958 return s->event;
1959 }
1960
1961 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1962 assert_return(s, -EINVAL);
1963 assert_return(s->type != SOURCE_EXIT, -EDOM);
1964 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1965 assert_return(!event_pid_changed(s->event), -ECHILD);
1966
1967 return s->pending;
1968 }
1969
1970 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1971 assert_return(s, -EINVAL);
1972 assert_return(s->type == SOURCE_IO, -EDOM);
1973 assert_return(!event_pid_changed(s->event), -ECHILD);
1974
1975 return s->io.fd;
1976 }
1977
1978 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1979 int r;
1980
1981 assert_return(s, -EINVAL);
1982 assert_return(fd >= 0, -EBADF);
1983 assert_return(s->type == SOURCE_IO, -EDOM);
1984 assert_return(!event_pid_changed(s->event), -ECHILD);
1985
1986 if (s->io.fd == fd)
1987 return 0;
1988
1989 if (s->enabled == SD_EVENT_OFF) {
1990 s->io.fd = fd;
1991 s->io.registered = false;
1992 } else {
1993 int saved_fd;
1994
1995 saved_fd = s->io.fd;
1996 assert(s->io.registered);
1997
1998 s->io.fd = fd;
1999 s->io.registered = false;
2000
2001 r = source_io_register(s, s->enabled, s->io.events);
2002 if (r < 0) {
2003 s->io.fd = saved_fd;
2004 s->io.registered = true;
2005 return r;
2006 }
2007
2008 (void) epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2009 }
2010
2011 return 0;
2012 }
2013
2014 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2015 assert_return(s, -EINVAL);
2016 assert_return(s->type == SOURCE_IO, -EDOM);
2017
2018 return s->io.owned;
2019 }
2020
2021 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2022 assert_return(s, -EINVAL);
2023 assert_return(s->type == SOURCE_IO, -EDOM);
2024
2025 s->io.owned = own;
2026 return 0;
2027 }
2028
2029 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2030 assert_return(s, -EINVAL);
2031 assert_return(events, -EINVAL);
2032 assert_return(s->type == SOURCE_IO, -EDOM);
2033 assert_return(!event_pid_changed(s->event), -ECHILD);
2034
2035 *events = s->io.events;
2036 return 0;
2037 }
2038
2039 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2040 int r;
2041
2042 assert_return(s, -EINVAL);
2043 assert_return(s->type == SOURCE_IO, -EDOM);
2044 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2045 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2046 assert_return(!event_pid_changed(s->event), -ECHILD);
2047
2048 /* edge-triggered updates are never skipped, so we can reset edges */
2049 if (s->io.events == events && !(events & EPOLLET))
2050 return 0;
2051
2052 r = source_set_pending(s, false);
2053 if (r < 0)
2054 return r;
2055
2056 if (s->enabled != SD_EVENT_OFF) {
2057 r = source_io_register(s, s->enabled, events);
2058 if (r < 0)
2059 return r;
2060 }
2061
2062 s->io.events = events;
2063
2064 return 0;
2065 }
2066
2067 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2068 assert_return(s, -EINVAL);
2069 assert_return(revents, -EINVAL);
2070 assert_return(s->type == SOURCE_IO, -EDOM);
2071 assert_return(s->pending, -ENODATA);
2072 assert_return(!event_pid_changed(s->event), -ECHILD);
2073
2074 *revents = s->io.revents;
2075 return 0;
2076 }
2077
2078 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2079 assert_return(s, -EINVAL);
2080 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2081 assert_return(!event_pid_changed(s->event), -ECHILD);
2082
2083 return s->signal.sig;
2084 }
2085
2086 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2087 assert_return(s, -EINVAL);
2088 assert_return(!event_pid_changed(s->event), -ECHILD);
2089
2090 *priority = s->priority;
2091 return 0;
2092 }
2093
2094 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2095 bool rm_inotify = false, rm_inode = false;
2096 struct inotify_data *new_inotify_data = NULL;
2097 struct inode_data *new_inode_data = NULL;
2098 int r;
2099
2100 assert_return(s, -EINVAL);
2101 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2102 assert_return(!event_pid_changed(s->event), -ECHILD);
2103
2104 if (s->priority == priority)
2105 return 0;
2106
2107 if (s->type == SOURCE_INOTIFY) {
2108 struct inode_data *old_inode_data;
2109
2110 assert(s->inotify.inode_data);
2111 old_inode_data = s->inotify.inode_data;
2112
2113 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2114 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2115 * events we allow priority changes only until the first following iteration. */
2116 if (old_inode_data->fd < 0)
2117 return -EOPNOTSUPP;
2118
2119 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2120 if (r < 0)
2121 return r;
2122 rm_inotify = r > 0;
2123
2124 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2125 if (r < 0)
2126 goto fail;
2127 rm_inode = r > 0;
2128
2129 if (new_inode_data->fd < 0) {
2130 /* Duplicate the fd for the new inode object if we don't have any yet */
2131 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2132 if (new_inode_data->fd < 0) {
2133 r = -errno;
2134 goto fail;
2135 }
2136
2137 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2138 }
2139
2140 /* Move the event source to the new inode data structure */
2141 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2142 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2143 s->inotify.inode_data = new_inode_data;
2144
2145 /* Now create the new watch */
2146 r = inode_data_realize_watch(s->event, new_inode_data);
2147 if (r < 0) {
2148 /* Move it back */
2149 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2150 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2151 s->inotify.inode_data = old_inode_data;
2152 goto fail;
2153 }
2154
2155 s->priority = priority;
2156
2157 event_gc_inode_data(s->event, old_inode_data);
2158
2159 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2160 struct signal_data *old, *d;
2161
2162 /* Move us from the signalfd belonging to the old
2163 * priority to the signalfd of the new priority */
2164
2165 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2166
2167 s->priority = priority;
2168
2169 r = event_make_signal_data(s->event, s->signal.sig, &d);
2170 if (r < 0) {
2171 s->priority = old->priority;
2172 return r;
2173 }
2174
2175 event_unmask_signal_data(s->event, old, s->signal.sig);
2176 } else
2177 s->priority = priority;
2178
2179 if (s->pending)
2180 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2181
2182 if (s->prepare)
2183 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2184
2185 if (s->type == SOURCE_EXIT)
2186 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2187
2188 return 0;
2189
2190 fail:
2191 if (rm_inode)
2192 event_free_inode_data(s->event, new_inode_data);
2193
2194 if (rm_inotify)
2195 event_free_inotify_data(s->event, new_inotify_data);
2196
2197 return r;
2198 }
2199
2200 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2201 assert_return(s, -EINVAL);
2202 assert_return(!event_pid_changed(s->event), -ECHILD);
2203
2204 if (m)
2205 *m = s->enabled;
2206 return s->enabled != SD_EVENT_OFF;
2207 }
2208
2209 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2210 int r;
2211
2212 assert_return(s, -EINVAL);
2213 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2214 assert_return(!event_pid_changed(s->event), -ECHILD);
2215
2216 /* If we are dead anyway, we are fine with turning off
2217 * sources, but everything else needs to fail. */
2218 if (s->event->state == SD_EVENT_FINISHED)
2219 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2220
2221 if (s->enabled == m)
2222 return 0;
2223
2224 if (m == SD_EVENT_OFF) {
2225
2226 /* Unset the pending flag when this event source is disabled */
2227 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2228 r = source_set_pending(s, false);
2229 if (r < 0)
2230 return r;
2231 }
2232
2233 switch (s->type) {
2234
2235 case SOURCE_IO:
2236 source_io_unregister(s);
2237 s->enabled = m;
2238 break;
2239
2240 case SOURCE_TIME_REALTIME:
2241 case SOURCE_TIME_BOOTTIME:
2242 case SOURCE_TIME_MONOTONIC:
2243 case SOURCE_TIME_REALTIME_ALARM:
2244 case SOURCE_TIME_BOOTTIME_ALARM: {
2245 struct clock_data *d;
2246
2247 s->enabled = m;
2248 d = event_get_clock_data(s->event, s->type);
2249 assert(d);
2250
2251 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2252 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2253 d->needs_rearm = true;
2254 break;
2255 }
2256
2257 case SOURCE_SIGNAL:
2258 s->enabled = m;
2259
2260 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2261 break;
2262
2263 case SOURCE_CHILD:
2264 s->enabled = m;
2265
2266 assert(s->event->n_enabled_child_sources > 0);
2267 s->event->n_enabled_child_sources--;
2268
2269 if (EVENT_SOURCE_WATCH_PIDFD(s))
2270 source_child_pidfd_unregister(s);
2271 else
2272 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2273
2274 break;
2275
2276 case SOURCE_EXIT:
2277 s->enabled = m;
2278 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2279 break;
2280
2281 case SOURCE_DEFER:
2282 case SOURCE_POST:
2283 case SOURCE_INOTIFY:
2284 s->enabled = m;
2285 break;
2286
2287 default:
2288 assert_not_reached("Wut? I shouldn't exist.");
2289 }
2290
2291 } else {
2292
2293 /* Unset the pending flag when this event source is enabled */
2294 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2295 r = source_set_pending(s, false);
2296 if (r < 0)
2297 return r;
2298 }
2299
2300 switch (s->type) {
2301
2302 case SOURCE_IO:
2303 r = source_io_register(s, m, s->io.events);
2304 if (r < 0)
2305 return r;
2306
2307 s->enabled = m;
2308 break;
2309
2310 case SOURCE_TIME_REALTIME:
2311 case SOURCE_TIME_BOOTTIME:
2312 case SOURCE_TIME_MONOTONIC:
2313 case SOURCE_TIME_REALTIME_ALARM:
2314 case SOURCE_TIME_BOOTTIME_ALARM: {
2315 struct clock_data *d;
2316
2317 s->enabled = m;
2318 d = event_get_clock_data(s->event, s->type);
2319 assert(d);
2320
2321 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2322 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2323 d->needs_rearm = true;
2324 break;
2325 }
2326
2327 case SOURCE_SIGNAL:
2328
2329 s->enabled = m;
2330
2331 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2332 if (r < 0) {
2333 s->enabled = SD_EVENT_OFF;
2334 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2335 return r;
2336 }
2337
2338 break;
2339
2340 case SOURCE_CHILD:
2341
2342 if (s->enabled == SD_EVENT_OFF)
2343 s->event->n_enabled_child_sources++;
2344
2345 s->enabled = m;
2346
2347 if (EVENT_SOURCE_WATCH_PIDFD(s)) {
2348 /* yes, we have pidfd */
2349
2350 r = source_child_pidfd_register(s, s->enabled);
2351 if (r < 0) {
2352 s->enabled = SD_EVENT_OFF;
2353 s->event->n_enabled_child_sources--;
2354 return r;
2355 }
2356 } else {
2357 /* no pidfd, or something other to watch for than WEXITED */
2358
2359 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2360 if (r < 0) {
2361 s->enabled = SD_EVENT_OFF;
2362 s->event->n_enabled_child_sources--;
2363 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2364 return r;
2365 }
2366 }
2367
2368 break;
2369
2370 case SOURCE_EXIT:
2371 s->enabled = m;
2372 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2373 break;
2374
2375 case SOURCE_DEFER:
2376 case SOURCE_POST:
2377 case SOURCE_INOTIFY:
2378 s->enabled = m;
2379 break;
2380
2381 default:
2382 assert_not_reached("Wut? I shouldn't exist.");
2383 }
2384 }
2385
2386 if (s->pending)
2387 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2388
2389 if (s->prepare)
2390 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2391
2392 return 0;
2393 }
2394
2395 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2396 assert_return(s, -EINVAL);
2397 assert_return(usec, -EINVAL);
2398 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2399 assert_return(!event_pid_changed(s->event), -ECHILD);
2400
2401 *usec = s->time.next;
2402 return 0;
2403 }
2404
2405 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2406 struct clock_data *d;
2407 int r;
2408
2409 assert_return(s, -EINVAL);
2410 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2411 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2412 assert_return(!event_pid_changed(s->event), -ECHILD);
2413
2414 r = source_set_pending(s, false);
2415 if (r < 0)
2416 return r;
2417
2418 s->time.next = usec;
2419
2420 d = event_get_clock_data(s->event, s->type);
2421 assert(d);
2422
2423 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2424 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2425 d->needs_rearm = true;
2426
2427 return 0;
2428 }
2429
2430 _public_ int sd_event_source_set_time_relative(sd_event_source *s, uint64_t usec) {
2431 usec_t t;
2432 int r;
2433
2434 assert_return(s, -EINVAL);
2435 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2436
2437 r = sd_event_now(s->event, event_source_type_to_clock(s->type), &t);
2438 if (r < 0)
2439 return r;
2440
2441 if (usec >= USEC_INFINITY - t)
2442 return -EOVERFLOW;
2443
2444 return sd_event_source_set_time(s, t + usec);
2445 }
2446
2447 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2448 assert_return(s, -EINVAL);
2449 assert_return(usec, -EINVAL);
2450 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2451 assert_return(!event_pid_changed(s->event), -ECHILD);
2452
2453 *usec = s->time.accuracy;
2454 return 0;
2455 }
2456
2457 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2458 struct clock_data *d;
2459 int r;
2460
2461 assert_return(s, -EINVAL);
2462 assert_return(usec != (uint64_t) -1, -EINVAL);
2463 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2464 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2465 assert_return(!event_pid_changed(s->event), -ECHILD);
2466
2467 r = source_set_pending(s, false);
2468 if (r < 0)
2469 return r;
2470
2471 if (usec == 0)
2472 usec = DEFAULT_ACCURACY_USEC;
2473
2474 s->time.accuracy = usec;
2475
2476 d = event_get_clock_data(s->event, s->type);
2477 assert(d);
2478
2479 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2480 d->needs_rearm = true;
2481
2482 return 0;
2483 }
2484
2485 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2486 assert_return(s, -EINVAL);
2487 assert_return(clock, -EINVAL);
2488 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2489 assert_return(!event_pid_changed(s->event), -ECHILD);
2490
2491 *clock = event_source_type_to_clock(s->type);
2492 return 0;
2493 }
2494
2495 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2496 assert_return(s, -EINVAL);
2497 assert_return(pid, -EINVAL);
2498 assert_return(s->type == SOURCE_CHILD, -EDOM);
2499 assert_return(!event_pid_changed(s->event), -ECHILD);
2500
2501 *pid = s->child.pid;
2502 return 0;
2503 }
2504
2505 _public_ int sd_event_source_get_child_pidfd(sd_event_source *s) {
2506 assert_return(s, -EINVAL);
2507 assert_return(s->type == SOURCE_CHILD, -EDOM);
2508 assert_return(!event_pid_changed(s->event), -ECHILD);
2509
2510 if (s->child.pidfd < 0)
2511 return -EOPNOTSUPP;
2512
2513 return s->child.pidfd;
2514 }
2515
2516 _public_ int sd_event_source_send_child_signal(sd_event_source *s, int sig, const siginfo_t *si, unsigned flags) {
2517 assert_return(s, -EINVAL);
2518 assert_return(s->type == SOURCE_CHILD, -EDOM);
2519 assert_return(!event_pid_changed(s->event), -ECHILD);
2520 assert_return(SIGNAL_VALID(sig), -EINVAL);
2521
2522 /* If we already have seen indication the process exited refuse sending a signal early. This way we
2523 * can be sure we don't accidentally kill the wrong process on PID reuse when pidfds are not
2524 * available. */
2525 if (s->child.exited)
2526 return -ESRCH;
2527
2528 if (s->child.pidfd >= 0) {
2529 siginfo_t copy;
2530
2531 /* pidfd_send_signal() changes the siginfo_t argument. This is weird, let's hence copy the
2532 * structure here */
2533 if (si)
2534 copy = *si;
2535
2536 if (pidfd_send_signal(s->child.pidfd, sig, si ? &copy : NULL, 0) < 0) {
2537 /* Let's propagate the error only if the system call is not implemented or prohibited */
2538 if (!ERRNO_IS_NOT_SUPPORTED(errno) && !ERRNO_IS_PRIVILEGE(errno))
2539 return -errno;
2540 } else
2541 return 0;
2542 }
2543
2544 /* Flags are only supported for pidfd_send_signal(), not for rt_sigqueueinfo(), hence let's refuse
2545 * this here. */
2546 if (flags != 0)
2547 return -EOPNOTSUPP;
2548
2549 if (si) {
2550 /* We use rt_sigqueueinfo() only if siginfo_t is specified. */
2551 siginfo_t copy = *si;
2552
2553 if (rt_sigqueueinfo(s->child.pid, sig, &copy) < 0)
2554 return -errno;
2555 } else if (kill(s->child.pid, sig) < 0)
2556 return -errno;
2557
2558 return 0;
2559 }
2560
2561 _public_ int sd_event_source_get_child_pidfd_own(sd_event_source *s) {
2562 assert_return(s, -EINVAL);
2563 assert_return(s->type == SOURCE_CHILD, -EDOM);
2564
2565 if (s->child.pidfd < 0)
2566 return -EOPNOTSUPP;
2567
2568 return s->child.pidfd_owned;
2569 }
2570
2571 _public_ int sd_event_source_set_child_pidfd_own(sd_event_source *s, int own) {
2572 assert_return(s, -EINVAL);
2573 assert_return(s->type == SOURCE_CHILD, -EDOM);
2574
2575 if (s->child.pidfd < 0)
2576 return -EOPNOTSUPP;
2577
2578 s->child.pidfd_owned = own;
2579 return 0;
2580 }
2581
2582 _public_ int sd_event_source_get_child_process_own(sd_event_source *s) {
2583 assert_return(s, -EINVAL);
2584 assert_return(s->type == SOURCE_CHILD, -EDOM);
2585
2586 return s->child.process_owned;
2587 }
2588
2589 _public_ int sd_event_source_set_child_process_own(sd_event_source *s, int own) {
2590 assert_return(s, -EINVAL);
2591 assert_return(s->type == SOURCE_CHILD, -EDOM);
2592
2593 s->child.process_owned = own;
2594 return 0;
2595 }
2596
2597 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2598 assert_return(s, -EINVAL);
2599 assert_return(mask, -EINVAL);
2600 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2601 assert_return(!event_pid_changed(s->event), -ECHILD);
2602
2603 *mask = s->inotify.mask;
2604 return 0;
2605 }
2606
2607 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2608 int r;
2609
2610 assert_return(s, -EINVAL);
2611 assert_return(s->type != SOURCE_EXIT, -EDOM);
2612 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2613 assert_return(!event_pid_changed(s->event), -ECHILD);
2614
2615 if (s->prepare == callback)
2616 return 0;
2617
2618 if (callback && s->prepare) {
2619 s->prepare = callback;
2620 return 0;
2621 }
2622
2623 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2624 if (r < 0)
2625 return r;
2626
2627 s->prepare = callback;
2628
2629 if (callback) {
2630 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2631 if (r < 0)
2632 return r;
2633 } else
2634 prioq_remove(s->event->prepare, s, &s->prepare_index);
2635
2636 return 0;
2637 }
2638
2639 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2640 assert_return(s, NULL);
2641
2642 return s->userdata;
2643 }
2644
2645 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2646 void *ret;
2647
2648 assert_return(s, NULL);
2649
2650 ret = s->userdata;
2651 s->userdata = userdata;
2652
2653 return ret;
2654 }
2655
2656 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2657 usec_t c;
2658 assert(e);
2659 assert(a <= b);
2660
2661 if (a <= 0)
2662 return 0;
2663 if (a >= USEC_INFINITY)
2664 return USEC_INFINITY;
2665
2666 if (b <= a + 1)
2667 return a;
2668
2669 initialize_perturb(e);
2670
2671 /*
2672 Find a good time to wake up again between times a and b. We
2673 have two goals here:
2674
2675 a) We want to wake up as seldom as possible, hence prefer
2676 later times over earlier times.
2677
2678 b) But if we have to wake up, then let's make sure to
2679 dispatch as much as possible on the entire system.
2680
2681 We implement this by waking up everywhere at the same time
2682 within any given minute if we can, synchronised via the
2683 perturbation value determined from the boot ID. If we can't,
2684 then we try to find the same spot in every 10s, then 1s and
2685 then 250ms step. Otherwise, we pick the last possible time
2686 to wake up.
2687 */
2688
2689 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2690 if (c >= b) {
2691 if (_unlikely_(c < USEC_PER_MINUTE))
2692 return b;
2693
2694 c -= USEC_PER_MINUTE;
2695 }
2696
2697 if (c >= a)
2698 return c;
2699
2700 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2701 if (c >= b) {
2702 if (_unlikely_(c < USEC_PER_SEC*10))
2703 return b;
2704
2705 c -= USEC_PER_SEC*10;
2706 }
2707
2708 if (c >= a)
2709 return c;
2710
2711 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2712 if (c >= b) {
2713 if (_unlikely_(c < USEC_PER_SEC))
2714 return b;
2715
2716 c -= USEC_PER_SEC;
2717 }
2718
2719 if (c >= a)
2720 return c;
2721
2722 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2723 if (c >= b) {
2724 if (_unlikely_(c < USEC_PER_MSEC*250))
2725 return b;
2726
2727 c -= USEC_PER_MSEC*250;
2728 }
2729
2730 if (c >= a)
2731 return c;
2732
2733 return b;
2734 }
2735
2736 static int event_arm_timer(
2737 sd_event *e,
2738 struct clock_data *d) {
2739
2740 struct itimerspec its = {};
2741 sd_event_source *a, *b;
2742 usec_t t;
2743 int r;
2744
2745 assert(e);
2746 assert(d);
2747
2748 if (!d->needs_rearm)
2749 return 0;
2750 else
2751 d->needs_rearm = false;
2752
2753 a = prioq_peek(d->earliest);
2754 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2755
2756 if (d->fd < 0)
2757 return 0;
2758
2759 if (d->next == USEC_INFINITY)
2760 return 0;
2761
2762 /* disarm */
2763 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2764 if (r < 0)
2765 return r;
2766
2767 d->next = USEC_INFINITY;
2768 return 0;
2769 }
2770
2771 b = prioq_peek(d->latest);
2772 assert_se(b && b->enabled != SD_EVENT_OFF);
2773
2774 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2775 if (d->next == t)
2776 return 0;
2777
2778 assert_se(d->fd >= 0);
2779
2780 if (t == 0) {
2781 /* We don' want to disarm here, just mean some time looooong ago. */
2782 its.it_value.tv_sec = 0;
2783 its.it_value.tv_nsec = 1;
2784 } else
2785 timespec_store(&its.it_value, t);
2786
2787 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2788 if (r < 0)
2789 return -errno;
2790
2791 d->next = t;
2792 return 0;
2793 }
2794
2795 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2796 assert(e);
2797 assert(s);
2798 assert(s->type == SOURCE_IO);
2799
2800 /* If the event source was already pending, we just OR in the
2801 * new revents, otherwise we reset the value. The ORing is
2802 * necessary to handle EPOLLONESHOT events properly where
2803 * readability might happen independently of writability, and
2804 * we need to keep track of both */
2805
2806 if (s->pending)
2807 s->io.revents |= revents;
2808 else
2809 s->io.revents = revents;
2810
2811 return source_set_pending(s, true);
2812 }
2813
2814 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2815 uint64_t x;
2816 ssize_t ss;
2817
2818 assert(e);
2819 assert(fd >= 0);
2820
2821 assert_return(events == EPOLLIN, -EIO);
2822
2823 ss = read(fd, &x, sizeof(x));
2824 if (ss < 0) {
2825 if (IN_SET(errno, EAGAIN, EINTR))
2826 return 0;
2827
2828 return -errno;
2829 }
2830
2831 if (_unlikely_(ss != sizeof(x)))
2832 return -EIO;
2833
2834 if (next)
2835 *next = USEC_INFINITY;
2836
2837 return 0;
2838 }
2839
2840 static int process_timer(
2841 sd_event *e,
2842 usec_t n,
2843 struct clock_data *d) {
2844
2845 sd_event_source *s;
2846 int r;
2847
2848 assert(e);
2849 assert(d);
2850
2851 for (;;) {
2852 s = prioq_peek(d->earliest);
2853 if (!s ||
2854 s->time.next > n ||
2855 s->enabled == SD_EVENT_OFF ||
2856 s->pending)
2857 break;
2858
2859 r = source_set_pending(s, true);
2860 if (r < 0)
2861 return r;
2862
2863 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2864 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2865 d->needs_rearm = true;
2866 }
2867
2868 return 0;
2869 }
2870
2871 static int process_child(sd_event *e) {
2872 sd_event_source *s;
2873 Iterator i;
2874 int r;
2875
2876 assert(e);
2877
2878 e->need_process_child = false;
2879
2880 /*
2881 So, this is ugly. We iteratively invoke waitid() with P_PID
2882 + WNOHANG for each PID we wait for, instead of using
2883 P_ALL. This is because we only want to get child
2884 information of very specific child processes, and not all
2885 of them. We might not have processed the SIGCHLD even of a
2886 previous invocation and we don't want to maintain a
2887 unbounded *per-child* event queue, hence we really don't
2888 want anything flushed out of the kernel's queue that we
2889 don't care about. Since this is O(n) this means that if you
2890 have a lot of processes you probably want to handle SIGCHLD
2891 yourself.
2892
2893 We do not reap the children here (by using WNOWAIT), this
2894 is only done after the event source is dispatched so that
2895 the callback still sees the process as a zombie.
2896 */
2897
2898 HASHMAP_FOREACH(s, e->child_sources, i) {
2899 assert(s->type == SOURCE_CHILD);
2900
2901 if (s->pending)
2902 continue;
2903
2904 if (s->enabled == SD_EVENT_OFF)
2905 continue;
2906
2907 if (s->child.exited)
2908 continue;
2909
2910 if (EVENT_SOURCE_WATCH_PIDFD(s)) /* There's a usable pidfd known for this event source? then don't waitid() for it here */
2911 continue;
2912
2913 zero(s->child.siginfo);
2914 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2915 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2916 if (r < 0)
2917 return -errno;
2918
2919 if (s->child.siginfo.si_pid != 0) {
2920 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2921
2922 if (zombie)
2923 s->child.exited = true;
2924
2925 if (!zombie && (s->child.options & WEXITED)) {
2926 /* If the child isn't dead then let's
2927 * immediately remove the state change
2928 * from the queue, since there's no
2929 * benefit in leaving it queued */
2930
2931 assert(s->child.options & (WSTOPPED|WCONTINUED));
2932 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2933 }
2934
2935 r = source_set_pending(s, true);
2936 if (r < 0)
2937 return r;
2938 }
2939 }
2940
2941 return 0;
2942 }
2943
2944 static int process_pidfd(sd_event *e, sd_event_source *s, uint32_t revents) {
2945 assert(e);
2946 assert(s);
2947 assert(s->type == SOURCE_CHILD);
2948
2949 if (s->pending)
2950 return 0;
2951
2952 if (s->enabled == SD_EVENT_OFF)
2953 return 0;
2954
2955 if (!EVENT_SOURCE_WATCH_PIDFD(s))
2956 return 0;
2957
2958 zero(s->child.siginfo);
2959 if (waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG | WNOWAIT | s->child.options) < 0)
2960 return -errno;
2961
2962 if (s->child.siginfo.si_pid == 0)
2963 return 0;
2964
2965 if (IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED))
2966 s->child.exited = true;
2967
2968 return source_set_pending(s, true);
2969 }
2970
2971 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2972 bool read_one = false;
2973 int r;
2974
2975 assert(e);
2976 assert(d);
2977 assert_return(events == EPOLLIN, -EIO);
2978
2979 /* If there's a signal queued on this priority and SIGCHLD is
2980 on this priority too, then make sure to recheck the
2981 children we watch. This is because we only ever dequeue
2982 the first signal per priority, and if we dequeue one, and
2983 SIGCHLD might be enqueued later we wouldn't know, but we
2984 might have higher priority children we care about hence we
2985 need to check that explicitly. */
2986
2987 if (sigismember(&d->sigset, SIGCHLD))
2988 e->need_process_child = true;
2989
2990 /* If there's already an event source pending for this
2991 * priority we don't read another */
2992 if (d->current)
2993 return 0;
2994
2995 for (;;) {
2996 struct signalfd_siginfo si;
2997 ssize_t n;
2998 sd_event_source *s = NULL;
2999
3000 n = read(d->fd, &si, sizeof(si));
3001 if (n < 0) {
3002 if (IN_SET(errno, EAGAIN, EINTR))
3003 return read_one;
3004
3005 return -errno;
3006 }
3007
3008 if (_unlikely_(n != sizeof(si)))
3009 return -EIO;
3010
3011 assert(SIGNAL_VALID(si.ssi_signo));
3012
3013 read_one = true;
3014
3015 if (e->signal_sources)
3016 s = e->signal_sources[si.ssi_signo];
3017 if (!s)
3018 continue;
3019 if (s->pending)
3020 continue;
3021
3022 s->signal.siginfo = si;
3023 d->current = s;
3024
3025 r = source_set_pending(s, true);
3026 if (r < 0)
3027 return r;
3028
3029 return 1;
3030 }
3031 }
3032
3033 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
3034 ssize_t n;
3035
3036 assert(e);
3037 assert(d);
3038
3039 assert_return(revents == EPOLLIN, -EIO);
3040
3041 /* If there's already an event source pending for this priority, don't read another */
3042 if (d->n_pending > 0)
3043 return 0;
3044
3045 /* Is the read buffer non-empty? If so, let's not read more */
3046 if (d->buffer_filled > 0)
3047 return 0;
3048
3049 n = read(d->fd, &d->buffer, sizeof(d->buffer));
3050 if (n < 0) {
3051 if (IN_SET(errno, EAGAIN, EINTR))
3052 return 0;
3053
3054 return -errno;
3055 }
3056
3057 assert(n > 0);
3058 d->buffer_filled = (size_t) n;
3059 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
3060
3061 return 1;
3062 }
3063
3064 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
3065 assert(e);
3066 assert(d);
3067 assert(sz <= d->buffer_filled);
3068
3069 if (sz == 0)
3070 return;
3071
3072 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
3073 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
3074 d->buffer_filled -= sz;
3075
3076 if (d->buffer_filled == 0)
3077 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
3078 }
3079
3080 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
3081 int r;
3082
3083 assert(e);
3084 assert(d);
3085
3086 /* If there's already an event source pending for this priority, don't read another */
3087 if (d->n_pending > 0)
3088 return 0;
3089
3090 while (d->buffer_filled > 0) {
3091 size_t sz;
3092
3093 /* Let's validate that the event structures are complete */
3094 if (d->buffer_filled < offsetof(struct inotify_event, name))
3095 return -EIO;
3096
3097 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3098 if (d->buffer_filled < sz)
3099 return -EIO;
3100
3101 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
3102 struct inode_data *inode_data;
3103 Iterator i;
3104
3105 /* The queue overran, let's pass this event to all event sources connected to this inotify
3106 * object */
3107
3108 HASHMAP_FOREACH(inode_data, d->inodes, i) {
3109 sd_event_source *s;
3110
3111 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3112
3113 if (s->enabled == SD_EVENT_OFF)
3114 continue;
3115
3116 r = source_set_pending(s, true);
3117 if (r < 0)
3118 return r;
3119 }
3120 }
3121 } else {
3122 struct inode_data *inode_data;
3123 sd_event_source *s;
3124
3125 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3126 * our watch descriptor table. */
3127 if (d->buffer.ev.mask & IN_IGNORED) {
3128
3129 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3130 if (!inode_data) {
3131 event_inotify_data_drop(e, d, sz);
3132 continue;
3133 }
3134
3135 /* The watch descriptor was removed by the kernel, let's drop it here too */
3136 inode_data->wd = -1;
3137 } else {
3138 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3139 if (!inode_data) {
3140 event_inotify_data_drop(e, d, sz);
3141 continue;
3142 }
3143 }
3144
3145 /* Trigger all event sources that are interested in these events. Also trigger all event
3146 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3147 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3148
3149 if (s->enabled == SD_EVENT_OFF)
3150 continue;
3151
3152 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3153 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3154 continue;
3155
3156 r = source_set_pending(s, true);
3157 if (r < 0)
3158 return r;
3159 }
3160 }
3161
3162 /* Something pending now? If so, let's finish, otherwise let's read more. */
3163 if (d->n_pending > 0)
3164 return 1;
3165 }
3166
3167 return 0;
3168 }
3169
3170 static int process_inotify(sd_event *e) {
3171 struct inotify_data *d;
3172 int r, done = 0;
3173
3174 assert(e);
3175
3176 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3177 r = event_inotify_data_process(e, d);
3178 if (r < 0)
3179 return r;
3180 if (r > 0)
3181 done ++;
3182 }
3183
3184 return done;
3185 }
3186
3187 static int source_dispatch(sd_event_source *s) {
3188 EventSourceType saved_type;
3189 int r = 0;
3190
3191 assert(s);
3192 assert(s->pending || s->type == SOURCE_EXIT);
3193
3194 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3195 * the event. */
3196 saved_type = s->type;
3197
3198 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3199 r = source_set_pending(s, false);
3200 if (r < 0)
3201 return r;
3202 }
3203
3204 if (s->type != SOURCE_POST) {
3205 sd_event_source *z;
3206 Iterator i;
3207
3208 /* If we execute a non-post source, let's mark all
3209 * post sources as pending */
3210
3211 SET_FOREACH(z, s->event->post_sources, i) {
3212 if (z->enabled == SD_EVENT_OFF)
3213 continue;
3214
3215 r = source_set_pending(z, true);
3216 if (r < 0)
3217 return r;
3218 }
3219 }
3220
3221 if (s->enabled == SD_EVENT_ONESHOT) {
3222 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3223 if (r < 0)
3224 return r;
3225 }
3226
3227 s->dispatching = true;
3228
3229 switch (s->type) {
3230
3231 case SOURCE_IO:
3232 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3233 break;
3234
3235 case SOURCE_TIME_REALTIME:
3236 case SOURCE_TIME_BOOTTIME:
3237 case SOURCE_TIME_MONOTONIC:
3238 case SOURCE_TIME_REALTIME_ALARM:
3239 case SOURCE_TIME_BOOTTIME_ALARM:
3240 r = s->time.callback(s, s->time.next, s->userdata);
3241 break;
3242
3243 case SOURCE_SIGNAL:
3244 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3245 break;
3246
3247 case SOURCE_CHILD: {
3248 bool zombie;
3249
3250 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3251
3252 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3253
3254 /* Now, reap the PID for good. */
3255 if (zombie) {
3256 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3257 s->child.waited = true;
3258 }
3259
3260 break;
3261 }
3262
3263 case SOURCE_DEFER:
3264 r = s->defer.callback(s, s->userdata);
3265 break;
3266
3267 case SOURCE_POST:
3268 r = s->post.callback(s, s->userdata);
3269 break;
3270
3271 case SOURCE_EXIT:
3272 r = s->exit.callback(s, s->userdata);
3273 break;
3274
3275 case SOURCE_INOTIFY: {
3276 struct sd_event *e = s->event;
3277 struct inotify_data *d;
3278 size_t sz;
3279
3280 assert(s->inotify.inode_data);
3281 assert_se(d = s->inotify.inode_data->inotify_data);
3282
3283 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3284 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3285 assert(d->buffer_filled >= sz);
3286
3287 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3288
3289 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3290 * buffer. */
3291 if (d->n_pending == 0)
3292 event_inotify_data_drop(e, d, sz);
3293
3294 break;
3295 }
3296
3297 case SOURCE_WATCHDOG:
3298 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3299 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3300 assert_not_reached("Wut? I shouldn't exist.");
3301 }
3302
3303 s->dispatching = false;
3304
3305 if (r < 0)
3306 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3307 strna(s->description), event_source_type_to_string(saved_type));
3308
3309 if (s->n_ref == 0)
3310 source_free(s);
3311 else if (r < 0)
3312 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3313
3314 return 1;
3315 }
3316
3317 static int event_prepare(sd_event *e) {
3318 int r;
3319
3320 assert(e);
3321
3322 for (;;) {
3323 sd_event_source *s;
3324
3325 s = prioq_peek(e->prepare);
3326 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3327 break;
3328
3329 s->prepare_iteration = e->iteration;
3330 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3331 if (r < 0)
3332 return r;
3333
3334 assert(s->prepare);
3335
3336 s->dispatching = true;
3337 r = s->prepare(s, s->userdata);
3338 s->dispatching = false;
3339
3340 if (r < 0)
3341 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3342 strna(s->description), event_source_type_to_string(s->type));
3343
3344 if (s->n_ref == 0)
3345 source_free(s);
3346 else if (r < 0)
3347 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3348 }
3349
3350 return 0;
3351 }
3352
3353 static int dispatch_exit(sd_event *e) {
3354 sd_event_source *p;
3355 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3356 int r;
3357
3358 assert(e);
3359
3360 p = prioq_peek(e->exit);
3361 if (!p || p->enabled == SD_EVENT_OFF) {
3362 e->state = SD_EVENT_FINISHED;
3363 return 0;
3364 }
3365
3366 ref = sd_event_ref(e);
3367 e->iteration++;
3368 e->state = SD_EVENT_EXITING;
3369 r = source_dispatch(p);
3370 e->state = SD_EVENT_INITIAL;
3371 return r;
3372 }
3373
3374 static sd_event_source* event_next_pending(sd_event *e) {
3375 sd_event_source *p;
3376
3377 assert(e);
3378
3379 p = prioq_peek(e->pending);
3380 if (!p)
3381 return NULL;
3382
3383 if (p->enabled == SD_EVENT_OFF)
3384 return NULL;
3385
3386 return p;
3387 }
3388
3389 static int arm_watchdog(sd_event *e) {
3390 struct itimerspec its = {};
3391 usec_t t;
3392 int r;
3393
3394 assert(e);
3395 assert(e->watchdog_fd >= 0);
3396
3397 t = sleep_between(e,
3398 e->watchdog_last + (e->watchdog_period / 2),
3399 e->watchdog_last + (e->watchdog_period * 3 / 4));
3400
3401 timespec_store(&its.it_value, t);
3402
3403 /* Make sure we never set the watchdog to 0, which tells the
3404 * kernel to disable it. */
3405 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3406 its.it_value.tv_nsec = 1;
3407
3408 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3409 if (r < 0)
3410 return -errno;
3411
3412 return 0;
3413 }
3414
3415 static int process_watchdog(sd_event *e) {
3416 assert(e);
3417
3418 if (!e->watchdog)
3419 return 0;
3420
3421 /* Don't notify watchdog too often */
3422 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3423 return 0;
3424
3425 sd_notify(false, "WATCHDOG=1");
3426 e->watchdog_last = e->timestamp.monotonic;
3427
3428 return arm_watchdog(e);
3429 }
3430
3431 static void event_close_inode_data_fds(sd_event *e) {
3432 struct inode_data *d;
3433
3434 assert(e);
3435
3436 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3437 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3438 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3439 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3440 * compromise. */
3441
3442 while ((d = e->inode_data_to_close)) {
3443 assert(d->fd >= 0);
3444 d->fd = safe_close(d->fd);
3445
3446 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3447 }
3448 }
3449
3450 _public_ int sd_event_prepare(sd_event *e) {
3451 int r;
3452
3453 assert_return(e, -EINVAL);
3454 assert_return(e = event_resolve(e), -ENOPKG);
3455 assert_return(!event_pid_changed(e), -ECHILD);
3456 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3457 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3458
3459 /* Let's check that if we are a default event loop we are executed in the correct thread. We only do
3460 * this check here once, since gettid() is typically not cached, and thus want to minimize
3461 * syscalls */
3462 assert_return(!e->default_event_ptr || e->tid == gettid(), -EREMOTEIO);
3463
3464 if (e->exit_requested)
3465 goto pending;
3466
3467 e->iteration++;
3468
3469 e->state = SD_EVENT_PREPARING;
3470 r = event_prepare(e);
3471 e->state = SD_EVENT_INITIAL;
3472 if (r < 0)
3473 return r;
3474
3475 r = event_arm_timer(e, &e->realtime);
3476 if (r < 0)
3477 return r;
3478
3479 r = event_arm_timer(e, &e->boottime);
3480 if (r < 0)
3481 return r;
3482
3483 r = event_arm_timer(e, &e->monotonic);
3484 if (r < 0)
3485 return r;
3486
3487 r = event_arm_timer(e, &e->realtime_alarm);
3488 if (r < 0)
3489 return r;
3490
3491 r = event_arm_timer(e, &e->boottime_alarm);
3492 if (r < 0)
3493 return r;
3494
3495 event_close_inode_data_fds(e);
3496
3497 if (event_next_pending(e) || e->need_process_child)
3498 goto pending;
3499
3500 e->state = SD_EVENT_ARMED;
3501
3502 return 0;
3503
3504 pending:
3505 e->state = SD_EVENT_ARMED;
3506 r = sd_event_wait(e, 0);
3507 if (r == 0)
3508 e->state = SD_EVENT_ARMED;
3509
3510 return r;
3511 }
3512
3513 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3514 size_t event_queue_max;
3515 int r, m, i;
3516
3517 assert_return(e, -EINVAL);
3518 assert_return(e = event_resolve(e), -ENOPKG);
3519 assert_return(!event_pid_changed(e), -ECHILD);
3520 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3521 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3522
3523 if (e->exit_requested) {
3524 e->state = SD_EVENT_PENDING;
3525 return 1;
3526 }
3527
3528 event_queue_max = MAX(e->n_sources, 1u);
3529 if (!GREEDY_REALLOC(e->event_queue, e->event_queue_allocated, event_queue_max))
3530 return -ENOMEM;
3531
3532 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3533 if (e->inotify_data_buffered)
3534 timeout = 0;
3535
3536 m = epoll_wait(e->epoll_fd, e->event_queue, event_queue_max,
3537 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3538 if (m < 0) {
3539 if (errno == EINTR) {
3540 e->state = SD_EVENT_PENDING;
3541 return 1;
3542 }
3543
3544 r = -errno;
3545 goto finish;
3546 }
3547
3548 triple_timestamp_get(&e->timestamp);
3549
3550 for (i = 0; i < m; i++) {
3551
3552 if (e->event_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3553 r = flush_timer(e, e->watchdog_fd, e->event_queue[i].events, NULL);
3554 else {
3555 WakeupType *t = e->event_queue[i].data.ptr;
3556
3557 switch (*t) {
3558
3559 case WAKEUP_EVENT_SOURCE: {
3560 sd_event_source *s = e->event_queue[i].data.ptr;
3561
3562 assert(s);
3563
3564 switch (s->type) {
3565
3566 case SOURCE_IO:
3567 r = process_io(e, s, e->event_queue[i].events);
3568 break;
3569
3570 case SOURCE_CHILD:
3571 r = process_pidfd(e, s, e->event_queue[i].events);
3572 break;
3573
3574 default:
3575 assert_not_reached("Unexpected event source type");
3576 }
3577
3578 break;
3579 }
3580
3581 case WAKEUP_CLOCK_DATA: {
3582 struct clock_data *d = e->event_queue[i].data.ptr;
3583
3584 assert(d);
3585
3586 r = flush_timer(e, d->fd, e->event_queue[i].events, &d->next);
3587 break;
3588 }
3589
3590 case WAKEUP_SIGNAL_DATA:
3591 r = process_signal(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3592 break;
3593
3594 case WAKEUP_INOTIFY_DATA:
3595 r = event_inotify_data_read(e, e->event_queue[i].data.ptr, e->event_queue[i].events);
3596 break;
3597
3598 default:
3599 assert_not_reached("Invalid wake-up pointer");
3600 }
3601 }
3602 if (r < 0)
3603 goto finish;
3604 }
3605
3606 r = process_watchdog(e);
3607 if (r < 0)
3608 goto finish;
3609
3610 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3611 if (r < 0)
3612 goto finish;
3613
3614 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3615 if (r < 0)
3616 goto finish;
3617
3618 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3619 if (r < 0)
3620 goto finish;
3621
3622 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3623 if (r < 0)
3624 goto finish;
3625
3626 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3627 if (r < 0)
3628 goto finish;
3629
3630 if (e->need_process_child) {
3631 r = process_child(e);
3632 if (r < 0)
3633 goto finish;
3634 }
3635
3636 r = process_inotify(e);
3637 if (r < 0)
3638 goto finish;
3639
3640 if (event_next_pending(e)) {
3641 e->state = SD_EVENT_PENDING;
3642
3643 return 1;
3644 }
3645
3646 r = 0;
3647
3648 finish:
3649 e->state = SD_EVENT_INITIAL;
3650
3651 return r;
3652 }
3653
3654 _public_ int sd_event_dispatch(sd_event *e) {
3655 sd_event_source *p;
3656 int r;
3657
3658 assert_return(e, -EINVAL);
3659 assert_return(e = event_resolve(e), -ENOPKG);
3660 assert_return(!event_pid_changed(e), -ECHILD);
3661 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3662 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3663
3664 if (e->exit_requested)
3665 return dispatch_exit(e);
3666
3667 p = event_next_pending(e);
3668 if (p) {
3669 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3670
3671 ref = sd_event_ref(e);
3672 e->state = SD_EVENT_RUNNING;
3673 r = source_dispatch(p);
3674 e->state = SD_EVENT_INITIAL;
3675 return r;
3676 }
3677
3678 e->state = SD_EVENT_INITIAL;
3679
3680 return 1;
3681 }
3682
3683 static void event_log_delays(sd_event *e) {
3684 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3685 size_t l, i;
3686
3687 p = b;
3688 l = sizeof(b);
3689 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3690 l = strpcpyf(&p, l, "%u ", e->delays[i]);
3691 e->delays[i] = 0;
3692 }
3693 log_debug("Event loop iterations: %s", b);
3694 }
3695
3696 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3697 int r;
3698
3699 assert_return(e, -EINVAL);
3700 assert_return(e = event_resolve(e), -ENOPKG);
3701 assert_return(!event_pid_changed(e), -ECHILD);
3702 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3703 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3704
3705 if (e->profile_delays && e->last_run) {
3706 usec_t this_run;
3707 unsigned l;
3708
3709 this_run = now(CLOCK_MONOTONIC);
3710
3711 l = u64log2(this_run - e->last_run);
3712 assert(l < sizeof(e->delays));
3713 e->delays[l]++;
3714
3715 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3716 event_log_delays(e);
3717 e->last_log = this_run;
3718 }
3719 }
3720
3721 r = sd_event_prepare(e);
3722 if (r == 0)
3723 /* There was nothing? Then wait... */
3724 r = sd_event_wait(e, timeout);
3725
3726 if (e->profile_delays)
3727 e->last_run = now(CLOCK_MONOTONIC);
3728
3729 if (r > 0) {
3730 /* There's something now, then let's dispatch it */
3731 r = sd_event_dispatch(e);
3732 if (r < 0)
3733 return r;
3734
3735 return 1;
3736 }
3737
3738 return r;
3739 }
3740
3741 _public_ int sd_event_loop(sd_event *e) {
3742 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3743 int r;
3744
3745 assert_return(e, -EINVAL);
3746 assert_return(e = event_resolve(e), -ENOPKG);
3747 assert_return(!event_pid_changed(e), -ECHILD);
3748 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3749
3750 ref = sd_event_ref(e);
3751
3752 while (e->state != SD_EVENT_FINISHED) {
3753 r = sd_event_run(e, (uint64_t) -1);
3754 if (r < 0)
3755 return r;
3756 }
3757
3758 return e->exit_code;
3759 }
3760
3761 _public_ int sd_event_get_fd(sd_event *e) {
3762
3763 assert_return(e, -EINVAL);
3764 assert_return(e = event_resolve(e), -ENOPKG);
3765 assert_return(!event_pid_changed(e), -ECHILD);
3766
3767 return e->epoll_fd;
3768 }
3769
3770 _public_ int sd_event_get_state(sd_event *e) {
3771 assert_return(e, -EINVAL);
3772 assert_return(e = event_resolve(e), -ENOPKG);
3773 assert_return(!event_pid_changed(e), -ECHILD);
3774
3775 return e->state;
3776 }
3777
3778 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3779 assert_return(e, -EINVAL);
3780 assert_return(e = event_resolve(e), -ENOPKG);
3781 assert_return(code, -EINVAL);
3782 assert_return(!event_pid_changed(e), -ECHILD);
3783
3784 if (!e->exit_requested)
3785 return -ENODATA;
3786
3787 *code = e->exit_code;
3788 return 0;
3789 }
3790
3791 _public_ int sd_event_exit(sd_event *e, int code) {
3792 assert_return(e, -EINVAL);
3793 assert_return(e = event_resolve(e), -ENOPKG);
3794 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3795 assert_return(!event_pid_changed(e), -ECHILD);
3796
3797 e->exit_requested = true;
3798 e->exit_code = code;
3799
3800 return 0;
3801 }
3802
3803 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3804 assert_return(e, -EINVAL);
3805 assert_return(e = event_resolve(e), -ENOPKG);
3806 assert_return(usec, -EINVAL);
3807 assert_return(!event_pid_changed(e), -ECHILD);
3808
3809 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3810 return -EOPNOTSUPP;
3811
3812 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3813 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3814 * the purpose of getting the time this doesn't matter. */
3815 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3816 return -EOPNOTSUPP;
3817
3818 if (!triple_timestamp_is_set(&e->timestamp)) {
3819 /* Implicitly fall back to now() if we never ran
3820 * before and thus have no cached time. */
3821 *usec = now(clock);
3822 return 1;
3823 }
3824
3825 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3826 return 0;
3827 }
3828
3829 _public_ int sd_event_default(sd_event **ret) {
3830 sd_event *e = NULL;
3831 int r;
3832
3833 if (!ret)
3834 return !!default_event;
3835
3836 if (default_event) {
3837 *ret = sd_event_ref(default_event);
3838 return 0;
3839 }
3840
3841 r = sd_event_new(&e);
3842 if (r < 0)
3843 return r;
3844
3845 e->default_event_ptr = &default_event;
3846 e->tid = gettid();
3847 default_event = e;
3848
3849 *ret = e;
3850 return 1;
3851 }
3852
3853 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3854 assert_return(e, -EINVAL);
3855 assert_return(e = event_resolve(e), -ENOPKG);
3856 assert_return(tid, -EINVAL);
3857 assert_return(!event_pid_changed(e), -ECHILD);
3858
3859 if (e->tid != 0) {
3860 *tid = e->tid;
3861 return 0;
3862 }
3863
3864 return -ENXIO;
3865 }
3866
3867 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3868 int r;
3869
3870 assert_return(e, -EINVAL);
3871 assert_return(e = event_resolve(e), -ENOPKG);
3872 assert_return(!event_pid_changed(e), -ECHILD);
3873
3874 if (e->watchdog == !!b)
3875 return e->watchdog;
3876
3877 if (b) {
3878 r = sd_watchdog_enabled(false, &e->watchdog_period);
3879 if (r <= 0)
3880 return r;
3881
3882 /* Issue first ping immediately */
3883 sd_notify(false, "WATCHDOG=1");
3884 e->watchdog_last = now(CLOCK_MONOTONIC);
3885
3886 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3887 if (e->watchdog_fd < 0)
3888 return -errno;
3889
3890 r = arm_watchdog(e);
3891 if (r < 0)
3892 goto fail;
3893
3894 struct epoll_event ev = {
3895 .events = EPOLLIN,
3896 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3897 };
3898
3899 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3900 if (r < 0) {
3901 r = -errno;
3902 goto fail;
3903 }
3904
3905 } else {
3906 if (e->watchdog_fd >= 0) {
3907 (void) epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3908 e->watchdog_fd = safe_close(e->watchdog_fd);
3909 }
3910 }
3911
3912 e->watchdog = !!b;
3913 return e->watchdog;
3914
3915 fail:
3916 e->watchdog_fd = safe_close(e->watchdog_fd);
3917 return r;
3918 }
3919
3920 _public_ int sd_event_get_watchdog(sd_event *e) {
3921 assert_return(e, -EINVAL);
3922 assert_return(e = event_resolve(e), -ENOPKG);
3923 assert_return(!event_pid_changed(e), -ECHILD);
3924
3925 return e->watchdog;
3926 }
3927
3928 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3929 assert_return(e, -EINVAL);
3930 assert_return(e = event_resolve(e), -ENOPKG);
3931 assert_return(!event_pid_changed(e), -ECHILD);
3932
3933 *ret = e->iteration;
3934 return 0;
3935 }
3936
3937 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3938 assert_return(s, -EINVAL);
3939
3940 s->destroy_callback = callback;
3941 return 0;
3942 }
3943
3944 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3945 assert_return(s, -EINVAL);
3946
3947 if (ret)
3948 *ret = s->destroy_callback;
3949
3950 return !!s->destroy_callback;
3951 }
3952
3953 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3954 assert_return(s, -EINVAL);
3955
3956 return s->floating;
3957 }
3958
3959 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3960 assert_return(s, -EINVAL);
3961
3962 if (s->floating == !!b)
3963 return 0;
3964
3965 if (!s->event) /* Already disconnected */
3966 return -ESTALE;
3967
3968 s->floating = b;
3969
3970 if (b) {
3971 sd_event_source_ref(s);
3972 sd_event_unref(s->event);
3973 } else {
3974 sd_event_ref(s->event);
3975 sd_event_source_unref(s);
3976 }
3977
3978 return 1;
3979 }