]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2013 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <sys/epoll.h>
22 #include <sys/timerfd.h>
23 #include <sys/wait.h>
24
25 #include "sd-daemon.h"
26 #include "sd-event.h"
27 #include "sd-id128.h"
28
29 #include "alloc-util.h"
30 #include "fd-util.h"
31 #include "hashmap.h"
32 #include "list.h"
33 #include "macro.h"
34 #include "missing.h"
35 #include "prioq.h"
36 #include "process-util.h"
37 #include "set.h"
38 #include "signal-util.h"
39 #include "string-table.h"
40 #include "string-util.h"
41 #include "time-util.h"
42 #include "util.h"
43
44 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
45
46 typedef enum EventSourceType {
47 SOURCE_IO,
48 SOURCE_TIME_REALTIME,
49 SOURCE_TIME_BOOTTIME,
50 SOURCE_TIME_MONOTONIC,
51 SOURCE_TIME_REALTIME_ALARM,
52 SOURCE_TIME_BOOTTIME_ALARM,
53 SOURCE_SIGNAL,
54 SOURCE_CHILD,
55 SOURCE_DEFER,
56 SOURCE_POST,
57 SOURCE_EXIT,
58 SOURCE_WATCHDOG,
59 _SOURCE_EVENT_SOURCE_TYPE_MAX,
60 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
61 } EventSourceType;
62
63 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
64 [SOURCE_IO] = "io",
65 [SOURCE_TIME_REALTIME] = "realtime",
66 [SOURCE_TIME_BOOTTIME] = "bootime",
67 [SOURCE_TIME_MONOTONIC] = "monotonic",
68 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
69 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
70 [SOURCE_SIGNAL] = "signal",
71 [SOURCE_CHILD] = "child",
72 [SOURCE_DEFER] = "defer",
73 [SOURCE_POST] = "post",
74 [SOURCE_EXIT] = "exit",
75 [SOURCE_WATCHDOG] = "watchdog",
76 };
77
78 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
79
80 /* All objects we use in epoll events start with this value, so that
81 * we know how to dispatch it */
82 typedef enum WakeupType {
83 WAKEUP_NONE,
84 WAKEUP_EVENT_SOURCE,
85 WAKEUP_CLOCK_DATA,
86 WAKEUP_SIGNAL_DATA,
87 _WAKEUP_TYPE_MAX,
88 _WAKEUP_TYPE_INVALID = -1,
89 } WakeupType;
90
91 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
92
93 struct sd_event_source {
94 WakeupType wakeup;
95
96 unsigned n_ref;
97
98 sd_event *event;
99 void *userdata;
100 sd_event_handler_t prepare;
101
102 char *description;
103
104 EventSourceType type:5;
105 int enabled:3;
106 bool pending:1;
107 bool dispatching:1;
108 bool floating:1;
109
110 int64_t priority;
111 unsigned pending_index;
112 unsigned prepare_index;
113 uint64_t pending_iteration;
114 uint64_t prepare_iteration;
115
116 LIST_FIELDS(sd_event_source, sources);
117
118 union {
119 struct {
120 sd_event_io_handler_t callback;
121 int fd;
122 uint32_t events;
123 uint32_t revents;
124 bool registered:1;
125 } io;
126 struct {
127 sd_event_time_handler_t callback;
128 usec_t next, accuracy;
129 unsigned earliest_index;
130 unsigned latest_index;
131 } time;
132 struct {
133 sd_event_signal_handler_t callback;
134 struct signalfd_siginfo siginfo;
135 int sig;
136 } signal;
137 struct {
138 sd_event_child_handler_t callback;
139 siginfo_t siginfo;
140 pid_t pid;
141 int options;
142 } child;
143 struct {
144 sd_event_handler_t callback;
145 } defer;
146 struct {
147 sd_event_handler_t callback;
148 } post;
149 struct {
150 sd_event_handler_t callback;
151 unsigned prioq_index;
152 } exit;
153 };
154 };
155
156 struct clock_data {
157 WakeupType wakeup;
158 int fd;
159
160 /* For all clocks we maintain two priority queues each, one
161 * ordered for the earliest times the events may be
162 * dispatched, and one ordered by the latest times they must
163 * have been dispatched. The range between the top entries in
164 * the two prioqs is the time window we can freely schedule
165 * wakeups in */
166
167 Prioq *earliest;
168 Prioq *latest;
169 usec_t next;
170
171 bool needs_rearm:1;
172 };
173
174 struct signal_data {
175 WakeupType wakeup;
176
177 /* For each priority we maintain one signal fd, so that we
178 * only have to dequeue a single event per priority at a
179 * time. */
180
181 int fd;
182 int64_t priority;
183 sigset_t sigset;
184 sd_event_source *current;
185 };
186
187 struct sd_event {
188 unsigned n_ref;
189
190 int epoll_fd;
191 int watchdog_fd;
192
193 Prioq *pending;
194 Prioq *prepare;
195
196 /* timerfd_create() only supports these five clocks so far. We
197 * can add support for more clocks when the kernel learns to
198 * deal with them, too. */
199 struct clock_data realtime;
200 struct clock_data boottime;
201 struct clock_data monotonic;
202 struct clock_data realtime_alarm;
203 struct clock_data boottime_alarm;
204
205 usec_t perturb;
206
207 sd_event_source **signal_sources; /* indexed by signal number */
208 Hashmap *signal_data; /* indexed by priority */
209
210 Hashmap *child_sources;
211 unsigned n_enabled_child_sources;
212
213 Set *post_sources;
214
215 Prioq *exit;
216
217 pid_t original_pid;
218
219 uint64_t iteration;
220 triple_timestamp timestamp;
221 int state;
222
223 bool exit_requested:1;
224 bool need_process_child:1;
225 bool watchdog:1;
226 bool profile_delays:1;
227
228 int exit_code;
229
230 pid_t tid;
231 sd_event **default_event_ptr;
232
233 usec_t watchdog_last, watchdog_period;
234
235 unsigned n_sources;
236
237 LIST_HEAD(sd_event_source, sources);
238
239 usec_t last_run, last_log;
240 unsigned delays[sizeof(usec_t) * 8];
241 };
242
243 static void source_disconnect(sd_event_source *s);
244
245 static int pending_prioq_compare(const void *a, const void *b) {
246 const sd_event_source *x = a, *y = b;
247
248 assert(x->pending);
249 assert(y->pending);
250
251 /* Enabled ones first */
252 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
253 return -1;
254 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
255 return 1;
256
257 /* Lower priority values first */
258 if (x->priority < y->priority)
259 return -1;
260 if (x->priority > y->priority)
261 return 1;
262
263 /* Older entries first */
264 if (x->pending_iteration < y->pending_iteration)
265 return -1;
266 if (x->pending_iteration > y->pending_iteration)
267 return 1;
268
269 return 0;
270 }
271
272 static int prepare_prioq_compare(const void *a, const void *b) {
273 const sd_event_source *x = a, *y = b;
274
275 assert(x->prepare);
276 assert(y->prepare);
277
278 /* Enabled ones first */
279 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
280 return -1;
281 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
282 return 1;
283
284 /* Move most recently prepared ones last, so that we can stop
285 * preparing as soon as we hit one that has already been
286 * prepared in the current iteration */
287 if (x->prepare_iteration < y->prepare_iteration)
288 return -1;
289 if (x->prepare_iteration > y->prepare_iteration)
290 return 1;
291
292 /* Lower priority values first */
293 if (x->priority < y->priority)
294 return -1;
295 if (x->priority > y->priority)
296 return 1;
297
298 return 0;
299 }
300
301 static int earliest_time_prioq_compare(const void *a, const void *b) {
302 const sd_event_source *x = a, *y = b;
303
304 assert(EVENT_SOURCE_IS_TIME(x->type));
305 assert(x->type == y->type);
306
307 /* Enabled ones first */
308 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
309 return -1;
310 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
311 return 1;
312
313 /* Move the pending ones to the end */
314 if (!x->pending && y->pending)
315 return -1;
316 if (x->pending && !y->pending)
317 return 1;
318
319 /* Order by time */
320 if (x->time.next < y->time.next)
321 return -1;
322 if (x->time.next > y->time.next)
323 return 1;
324
325 return 0;
326 }
327
328 static usec_t time_event_source_latest(const sd_event_source *s) {
329 return usec_add(s->time.next, s->time.accuracy);
330 }
331
332 static int latest_time_prioq_compare(const void *a, const void *b) {
333 const sd_event_source *x = a, *y = b;
334
335 assert(EVENT_SOURCE_IS_TIME(x->type));
336 assert(x->type == y->type);
337
338 /* Enabled ones first */
339 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
340 return -1;
341 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
342 return 1;
343
344 /* Move the pending ones to the end */
345 if (!x->pending && y->pending)
346 return -1;
347 if (x->pending && !y->pending)
348 return 1;
349
350 /* Order by time */
351 if (time_event_source_latest(x) < time_event_source_latest(y))
352 return -1;
353 if (time_event_source_latest(x) > time_event_source_latest(y))
354 return 1;
355
356 return 0;
357 }
358
359 static int exit_prioq_compare(const void *a, const void *b) {
360 const sd_event_source *x = a, *y = b;
361
362 assert(x->type == SOURCE_EXIT);
363 assert(y->type == SOURCE_EXIT);
364
365 /* Enabled ones first */
366 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
367 return -1;
368 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
369 return 1;
370
371 /* Lower priority values first */
372 if (x->priority < y->priority)
373 return -1;
374 if (x->priority > y->priority)
375 return 1;
376
377 return 0;
378 }
379
380 static void free_clock_data(struct clock_data *d) {
381 assert(d);
382 assert(d->wakeup == WAKEUP_CLOCK_DATA);
383
384 safe_close(d->fd);
385 prioq_free(d->earliest);
386 prioq_free(d->latest);
387 }
388
389 static void event_free(sd_event *e) {
390 sd_event_source *s;
391
392 assert(e);
393
394 while ((s = e->sources)) {
395 assert(s->floating);
396 source_disconnect(s);
397 sd_event_source_unref(s);
398 }
399
400 assert(e->n_sources == 0);
401
402 if (e->default_event_ptr)
403 *(e->default_event_ptr) = NULL;
404
405 safe_close(e->epoll_fd);
406 safe_close(e->watchdog_fd);
407
408 free_clock_data(&e->realtime);
409 free_clock_data(&e->boottime);
410 free_clock_data(&e->monotonic);
411 free_clock_data(&e->realtime_alarm);
412 free_clock_data(&e->boottime_alarm);
413
414 prioq_free(e->pending);
415 prioq_free(e->prepare);
416 prioq_free(e->exit);
417
418 free(e->signal_sources);
419 hashmap_free(e->signal_data);
420
421 hashmap_free(e->child_sources);
422 set_free(e->post_sources);
423 free(e);
424 }
425
426 _public_ int sd_event_new(sd_event** ret) {
427 sd_event *e;
428 int r;
429
430 assert_return(ret, -EINVAL);
431
432 e = new0(sd_event, 1);
433 if (!e)
434 return -ENOMEM;
435
436 e->n_ref = 1;
437 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
438 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
439 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
440 e->original_pid = getpid_cached();
441 e->perturb = USEC_INFINITY;
442
443 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
444 if (r < 0)
445 goto fail;
446
447 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
448 if (e->epoll_fd < 0) {
449 r = -errno;
450 goto fail;
451 }
452
453 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
454 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
455 e->profile_delays = true;
456 }
457
458 *ret = e;
459 return 0;
460
461 fail:
462 event_free(e);
463 return r;
464 }
465
466 _public_ sd_event* sd_event_ref(sd_event *e) {
467
468 if (!e)
469 return NULL;
470
471 assert(e->n_ref >= 1);
472 e->n_ref++;
473
474 return e;
475 }
476
477 _public_ sd_event* sd_event_unref(sd_event *e) {
478
479 if (!e)
480 return NULL;
481
482 assert(e->n_ref >= 1);
483 e->n_ref--;
484
485 if (e->n_ref <= 0)
486 event_free(e);
487
488 return NULL;
489 }
490
491 static bool event_pid_changed(sd_event *e) {
492 assert(e);
493
494 /* We don't support people creating an event loop and keeping
495 * it around over a fork(). Let's complain. */
496
497 return e->original_pid != getpid_cached();
498 }
499
500 static void source_io_unregister(sd_event_source *s) {
501 int r;
502
503 assert(s);
504 assert(s->type == SOURCE_IO);
505
506 if (event_pid_changed(s->event))
507 return;
508
509 if (!s->io.registered)
510 return;
511
512 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
513 if (r < 0)
514 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
515 strna(s->description), event_source_type_to_string(s->type));
516
517 s->io.registered = false;
518 }
519
520 static int source_io_register(
521 sd_event_source *s,
522 int enabled,
523 uint32_t events) {
524
525 struct epoll_event ev = {};
526 int r;
527
528 assert(s);
529 assert(s->type == SOURCE_IO);
530 assert(enabled != SD_EVENT_OFF);
531
532 ev.events = events;
533 ev.data.ptr = s;
534
535 if (enabled == SD_EVENT_ONESHOT)
536 ev.events |= EPOLLONESHOT;
537
538 if (s->io.registered)
539 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
540 else
541 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
542 if (r < 0)
543 return -errno;
544
545 s->io.registered = true;
546
547 return 0;
548 }
549
550 static clockid_t event_source_type_to_clock(EventSourceType t) {
551
552 switch (t) {
553
554 case SOURCE_TIME_REALTIME:
555 return CLOCK_REALTIME;
556
557 case SOURCE_TIME_BOOTTIME:
558 return CLOCK_BOOTTIME;
559
560 case SOURCE_TIME_MONOTONIC:
561 return CLOCK_MONOTONIC;
562
563 case SOURCE_TIME_REALTIME_ALARM:
564 return CLOCK_REALTIME_ALARM;
565
566 case SOURCE_TIME_BOOTTIME_ALARM:
567 return CLOCK_BOOTTIME_ALARM;
568
569 default:
570 return (clockid_t) -1;
571 }
572 }
573
574 static EventSourceType clock_to_event_source_type(clockid_t clock) {
575
576 switch (clock) {
577
578 case CLOCK_REALTIME:
579 return SOURCE_TIME_REALTIME;
580
581 case CLOCK_BOOTTIME:
582 return SOURCE_TIME_BOOTTIME;
583
584 case CLOCK_MONOTONIC:
585 return SOURCE_TIME_MONOTONIC;
586
587 case CLOCK_REALTIME_ALARM:
588 return SOURCE_TIME_REALTIME_ALARM;
589
590 case CLOCK_BOOTTIME_ALARM:
591 return SOURCE_TIME_BOOTTIME_ALARM;
592
593 default:
594 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
595 }
596 }
597
598 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
599 assert(e);
600
601 switch (t) {
602
603 case SOURCE_TIME_REALTIME:
604 return &e->realtime;
605
606 case SOURCE_TIME_BOOTTIME:
607 return &e->boottime;
608
609 case SOURCE_TIME_MONOTONIC:
610 return &e->monotonic;
611
612 case SOURCE_TIME_REALTIME_ALARM:
613 return &e->realtime_alarm;
614
615 case SOURCE_TIME_BOOTTIME_ALARM:
616 return &e->boottime_alarm;
617
618 default:
619 return NULL;
620 }
621 }
622
623 static int event_make_signal_data(
624 sd_event *e,
625 int sig,
626 struct signal_data **ret) {
627
628 struct epoll_event ev = {};
629 struct signal_data *d;
630 bool added = false;
631 sigset_t ss_copy;
632 int64_t priority;
633 int r;
634
635 assert(e);
636
637 if (event_pid_changed(e))
638 return -ECHILD;
639
640 if (e->signal_sources && e->signal_sources[sig])
641 priority = e->signal_sources[sig]->priority;
642 else
643 priority = 0;
644
645 d = hashmap_get(e->signal_data, &priority);
646 if (d) {
647 if (sigismember(&d->sigset, sig) > 0) {
648 if (ret)
649 *ret = d;
650 return 0;
651 }
652 } else {
653 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
654 if (r < 0)
655 return r;
656
657 d = new0(struct signal_data, 1);
658 if (!d)
659 return -ENOMEM;
660
661 d->wakeup = WAKEUP_SIGNAL_DATA;
662 d->fd = -1;
663 d->priority = priority;
664
665 r = hashmap_put(e->signal_data, &d->priority, d);
666 if (r < 0) {
667 free(d);
668 return r;
669 }
670
671 added = true;
672 }
673
674 ss_copy = d->sigset;
675 assert_se(sigaddset(&ss_copy, sig) >= 0);
676
677 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
678 if (r < 0) {
679 r = -errno;
680 goto fail;
681 }
682
683 d->sigset = ss_copy;
684
685 if (d->fd >= 0) {
686 if (ret)
687 *ret = d;
688 return 0;
689 }
690
691 d->fd = r;
692
693 ev.events = EPOLLIN;
694 ev.data.ptr = d;
695
696 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
697 if (r < 0) {
698 r = -errno;
699 goto fail;
700 }
701
702 if (ret)
703 *ret = d;
704
705 return 0;
706
707 fail:
708 if (added) {
709 d->fd = safe_close(d->fd);
710 hashmap_remove(e->signal_data, &d->priority);
711 free(d);
712 }
713
714 return r;
715 }
716
717 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
718 assert(e);
719 assert(d);
720
721 /* Turns off the specified signal in the signal data
722 * object. If the signal mask of the object becomes empty that
723 * way removes it. */
724
725 if (sigismember(&d->sigset, sig) == 0)
726 return;
727
728 assert_se(sigdelset(&d->sigset, sig) >= 0);
729
730 if (sigisemptyset(&d->sigset)) {
731
732 /* If all the mask is all-zero we can get rid of the structure */
733 hashmap_remove(e->signal_data, &d->priority);
734 safe_close(d->fd);
735 free(d);
736 return;
737 }
738
739 assert(d->fd >= 0);
740
741 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
742 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
743 }
744
745 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
746 struct signal_data *d;
747 static const int64_t zero_priority = 0;
748
749 assert(e);
750
751 /* Rechecks if the specified signal is still something we are
752 * interested in. If not, we'll unmask it, and possibly drop
753 * the signalfd for it. */
754
755 if (sig == SIGCHLD &&
756 e->n_enabled_child_sources > 0)
757 return;
758
759 if (e->signal_sources &&
760 e->signal_sources[sig] &&
761 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
762 return;
763
764 /*
765 * The specified signal might be enabled in three different queues:
766 *
767 * 1) the one that belongs to the priority passed (if it is non-NULL)
768 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
769 * 3) the 0 priority (to cover the SIGCHLD case)
770 *
771 * Hence, let's remove it from all three here.
772 */
773
774 if (priority) {
775 d = hashmap_get(e->signal_data, priority);
776 if (d)
777 event_unmask_signal_data(e, d, sig);
778 }
779
780 if (e->signal_sources && e->signal_sources[sig]) {
781 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
782 if (d)
783 event_unmask_signal_data(e, d, sig);
784 }
785
786 d = hashmap_get(e->signal_data, &zero_priority);
787 if (d)
788 event_unmask_signal_data(e, d, sig);
789 }
790
791 static void source_disconnect(sd_event_source *s) {
792 sd_event *event;
793
794 assert(s);
795
796 if (!s->event)
797 return;
798
799 assert(s->event->n_sources > 0);
800
801 switch (s->type) {
802
803 case SOURCE_IO:
804 if (s->io.fd >= 0)
805 source_io_unregister(s);
806
807 break;
808
809 case SOURCE_TIME_REALTIME:
810 case SOURCE_TIME_BOOTTIME:
811 case SOURCE_TIME_MONOTONIC:
812 case SOURCE_TIME_REALTIME_ALARM:
813 case SOURCE_TIME_BOOTTIME_ALARM: {
814 struct clock_data *d;
815
816 d = event_get_clock_data(s->event, s->type);
817 assert(d);
818
819 prioq_remove(d->earliest, s, &s->time.earliest_index);
820 prioq_remove(d->latest, s, &s->time.latest_index);
821 d->needs_rearm = true;
822 break;
823 }
824
825 case SOURCE_SIGNAL:
826 if (s->signal.sig > 0) {
827
828 if (s->event->signal_sources)
829 s->event->signal_sources[s->signal.sig] = NULL;
830
831 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
832 }
833
834 break;
835
836 case SOURCE_CHILD:
837 if (s->child.pid > 0) {
838 if (s->enabled != SD_EVENT_OFF) {
839 assert(s->event->n_enabled_child_sources > 0);
840 s->event->n_enabled_child_sources--;
841 }
842
843 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
844 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
845 }
846
847 break;
848
849 case SOURCE_DEFER:
850 /* nothing */
851 break;
852
853 case SOURCE_POST:
854 set_remove(s->event->post_sources, s);
855 break;
856
857 case SOURCE_EXIT:
858 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
859 break;
860
861 default:
862 assert_not_reached("Wut? I shouldn't exist.");
863 }
864
865 if (s->pending)
866 prioq_remove(s->event->pending, s, &s->pending_index);
867
868 if (s->prepare)
869 prioq_remove(s->event->prepare, s, &s->prepare_index);
870
871 event = s->event;
872
873 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
874 s->event = NULL;
875 LIST_REMOVE(sources, event->sources, s);
876 event->n_sources--;
877
878 if (!s->floating)
879 sd_event_unref(event);
880 }
881
882 static void source_free(sd_event_source *s) {
883 assert(s);
884
885 source_disconnect(s);
886 free(s->description);
887 free(s);
888 }
889
890 static int source_set_pending(sd_event_source *s, bool b) {
891 int r;
892
893 assert(s);
894 assert(s->type != SOURCE_EXIT);
895
896 if (s->pending == b)
897 return 0;
898
899 s->pending = b;
900
901 if (b) {
902 s->pending_iteration = s->event->iteration;
903
904 r = prioq_put(s->event->pending, s, &s->pending_index);
905 if (r < 0) {
906 s->pending = false;
907 return r;
908 }
909 } else
910 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
911
912 if (EVENT_SOURCE_IS_TIME(s->type)) {
913 struct clock_data *d;
914
915 d = event_get_clock_data(s->event, s->type);
916 assert(d);
917
918 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
919 prioq_reshuffle(d->latest, s, &s->time.latest_index);
920 d->needs_rearm = true;
921 }
922
923 if (s->type == SOURCE_SIGNAL && !b) {
924 struct signal_data *d;
925
926 d = hashmap_get(s->event->signal_data, &s->priority);
927 if (d && d->current == s)
928 d->current = NULL;
929 }
930
931 return 0;
932 }
933
934 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
935 sd_event_source *s;
936
937 assert(e);
938
939 s = new0(sd_event_source, 1);
940 if (!s)
941 return NULL;
942
943 s->n_ref = 1;
944 s->event = e;
945 s->floating = floating;
946 s->type = type;
947 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
948
949 if (!floating)
950 sd_event_ref(e);
951
952 LIST_PREPEND(sources, e->sources, s);
953 e->n_sources++;
954
955 return s;
956 }
957
958 _public_ int sd_event_add_io(
959 sd_event *e,
960 sd_event_source **ret,
961 int fd,
962 uint32_t events,
963 sd_event_io_handler_t callback,
964 void *userdata) {
965
966 sd_event_source *s;
967 int r;
968
969 assert_return(e, -EINVAL);
970 assert_return(fd >= 0, -EBADF);
971 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
972 assert_return(callback, -EINVAL);
973 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
974 assert_return(!event_pid_changed(e), -ECHILD);
975
976 s = source_new(e, !ret, SOURCE_IO);
977 if (!s)
978 return -ENOMEM;
979
980 s->wakeup = WAKEUP_EVENT_SOURCE;
981 s->io.fd = fd;
982 s->io.events = events;
983 s->io.callback = callback;
984 s->userdata = userdata;
985 s->enabled = SD_EVENT_ON;
986
987 r = source_io_register(s, s->enabled, events);
988 if (r < 0) {
989 source_free(s);
990 return r;
991 }
992
993 if (ret)
994 *ret = s;
995
996 return 0;
997 }
998
999 static void initialize_perturb(sd_event *e) {
1000 sd_id128_t bootid = {};
1001
1002 /* When we sleep for longer, we try to realign the wakeup to
1003 the same time wihtin each minute/second/250ms, so that
1004 events all across the system can be coalesced into a single
1005 CPU wakeup. However, let's take some system-specific
1006 randomness for this value, so that in a network of systems
1007 with synced clocks timer events are distributed a
1008 bit. Here, we calculate a perturbation usec offset from the
1009 boot ID. */
1010
1011 if (_likely_(e->perturb != USEC_INFINITY))
1012 return;
1013
1014 if (sd_id128_get_boot(&bootid) >= 0)
1015 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1016 }
1017
1018 static int event_setup_timer_fd(
1019 sd_event *e,
1020 struct clock_data *d,
1021 clockid_t clock) {
1022
1023 struct epoll_event ev = {};
1024 int r, fd;
1025
1026 assert(e);
1027 assert(d);
1028
1029 if (_likely_(d->fd >= 0))
1030 return 0;
1031
1032 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1033 if (fd < 0)
1034 return -errno;
1035
1036 ev.events = EPOLLIN;
1037 ev.data.ptr = d;
1038
1039 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1040 if (r < 0) {
1041 safe_close(fd);
1042 return -errno;
1043 }
1044
1045 d->fd = fd;
1046 return 0;
1047 }
1048
1049 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1050 assert(s);
1051
1052 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1053 }
1054
1055 _public_ int sd_event_add_time(
1056 sd_event *e,
1057 sd_event_source **ret,
1058 clockid_t clock,
1059 uint64_t usec,
1060 uint64_t accuracy,
1061 sd_event_time_handler_t callback,
1062 void *userdata) {
1063
1064 EventSourceType type;
1065 sd_event_source *s;
1066 struct clock_data *d;
1067 int r;
1068
1069 assert_return(e, -EINVAL);
1070 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1071 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1072 assert_return(!event_pid_changed(e), -ECHILD);
1073
1074 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1075 return -EOPNOTSUPP;
1076
1077 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1078 if (type < 0)
1079 return -EOPNOTSUPP;
1080
1081 if (!callback)
1082 callback = time_exit_callback;
1083
1084 d = event_get_clock_data(e, type);
1085 assert(d);
1086
1087 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1088 if (r < 0)
1089 return r;
1090
1091 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1092 if (r < 0)
1093 return r;
1094
1095 if (d->fd < 0) {
1096 r = event_setup_timer_fd(e, d, clock);
1097 if (r < 0)
1098 return r;
1099 }
1100
1101 s = source_new(e, !ret, type);
1102 if (!s)
1103 return -ENOMEM;
1104
1105 s->time.next = usec;
1106 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1107 s->time.callback = callback;
1108 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1109 s->userdata = userdata;
1110 s->enabled = SD_EVENT_ONESHOT;
1111
1112 d->needs_rearm = true;
1113
1114 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1115 if (r < 0)
1116 goto fail;
1117
1118 r = prioq_put(d->latest, s, &s->time.latest_index);
1119 if (r < 0)
1120 goto fail;
1121
1122 if (ret)
1123 *ret = s;
1124
1125 return 0;
1126
1127 fail:
1128 source_free(s);
1129 return r;
1130 }
1131
1132 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1133 assert(s);
1134
1135 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1136 }
1137
1138 _public_ int sd_event_add_signal(
1139 sd_event *e,
1140 sd_event_source **ret,
1141 int sig,
1142 sd_event_signal_handler_t callback,
1143 void *userdata) {
1144
1145 sd_event_source *s;
1146 struct signal_data *d;
1147 sigset_t ss;
1148 int r;
1149
1150 assert_return(e, -EINVAL);
1151 assert_return(SIGNAL_VALID(sig), -EINVAL);
1152 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1153 assert_return(!event_pid_changed(e), -ECHILD);
1154
1155 if (!callback)
1156 callback = signal_exit_callback;
1157
1158 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1159 if (r != 0)
1160 return -r;
1161
1162 if (!sigismember(&ss, sig))
1163 return -EBUSY;
1164
1165 if (!e->signal_sources) {
1166 e->signal_sources = new0(sd_event_source*, _NSIG);
1167 if (!e->signal_sources)
1168 return -ENOMEM;
1169 } else if (e->signal_sources[sig])
1170 return -EBUSY;
1171
1172 s = source_new(e, !ret, SOURCE_SIGNAL);
1173 if (!s)
1174 return -ENOMEM;
1175
1176 s->signal.sig = sig;
1177 s->signal.callback = callback;
1178 s->userdata = userdata;
1179 s->enabled = SD_EVENT_ON;
1180
1181 e->signal_sources[sig] = s;
1182
1183 r = event_make_signal_data(e, sig, &d);
1184 if (r < 0) {
1185 source_free(s);
1186 return r;
1187 }
1188
1189 /* Use the signal name as description for the event source by default */
1190 (void) sd_event_source_set_description(s, signal_to_string(sig));
1191
1192 if (ret)
1193 *ret = s;
1194
1195 return 0;
1196 }
1197
1198 _public_ int sd_event_add_child(
1199 sd_event *e,
1200 sd_event_source **ret,
1201 pid_t pid,
1202 int options,
1203 sd_event_child_handler_t callback,
1204 void *userdata) {
1205
1206 sd_event_source *s;
1207 int r;
1208
1209 assert_return(e, -EINVAL);
1210 assert_return(pid > 1, -EINVAL);
1211 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1212 assert_return(options != 0, -EINVAL);
1213 assert_return(callback, -EINVAL);
1214 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1215 assert_return(!event_pid_changed(e), -ECHILD);
1216
1217 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1218 if (r < 0)
1219 return r;
1220
1221 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1222 return -EBUSY;
1223
1224 s = source_new(e, !ret, SOURCE_CHILD);
1225 if (!s)
1226 return -ENOMEM;
1227
1228 s->child.pid = pid;
1229 s->child.options = options;
1230 s->child.callback = callback;
1231 s->userdata = userdata;
1232 s->enabled = SD_EVENT_ONESHOT;
1233
1234 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1235 if (r < 0) {
1236 source_free(s);
1237 return r;
1238 }
1239
1240 e->n_enabled_child_sources++;
1241
1242 r = event_make_signal_data(e, SIGCHLD, NULL);
1243 if (r < 0) {
1244 e->n_enabled_child_sources--;
1245 source_free(s);
1246 return r;
1247 }
1248
1249 e->need_process_child = true;
1250
1251 if (ret)
1252 *ret = s;
1253
1254 return 0;
1255 }
1256
1257 _public_ int sd_event_add_defer(
1258 sd_event *e,
1259 sd_event_source **ret,
1260 sd_event_handler_t callback,
1261 void *userdata) {
1262
1263 sd_event_source *s;
1264 int r;
1265
1266 assert_return(e, -EINVAL);
1267 assert_return(callback, -EINVAL);
1268 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1269 assert_return(!event_pid_changed(e), -ECHILD);
1270
1271 s = source_new(e, !ret, SOURCE_DEFER);
1272 if (!s)
1273 return -ENOMEM;
1274
1275 s->defer.callback = callback;
1276 s->userdata = userdata;
1277 s->enabled = SD_EVENT_ONESHOT;
1278
1279 r = source_set_pending(s, true);
1280 if (r < 0) {
1281 source_free(s);
1282 return r;
1283 }
1284
1285 if (ret)
1286 *ret = s;
1287
1288 return 0;
1289 }
1290
1291 _public_ int sd_event_add_post(
1292 sd_event *e,
1293 sd_event_source **ret,
1294 sd_event_handler_t callback,
1295 void *userdata) {
1296
1297 sd_event_source *s;
1298 int r;
1299
1300 assert_return(e, -EINVAL);
1301 assert_return(callback, -EINVAL);
1302 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1303 assert_return(!event_pid_changed(e), -ECHILD);
1304
1305 r = set_ensure_allocated(&e->post_sources, NULL);
1306 if (r < 0)
1307 return r;
1308
1309 s = source_new(e, !ret, SOURCE_POST);
1310 if (!s)
1311 return -ENOMEM;
1312
1313 s->post.callback = callback;
1314 s->userdata = userdata;
1315 s->enabled = SD_EVENT_ON;
1316
1317 r = set_put(e->post_sources, s);
1318 if (r < 0) {
1319 source_free(s);
1320 return r;
1321 }
1322
1323 if (ret)
1324 *ret = s;
1325
1326 return 0;
1327 }
1328
1329 _public_ int sd_event_add_exit(
1330 sd_event *e,
1331 sd_event_source **ret,
1332 sd_event_handler_t callback,
1333 void *userdata) {
1334
1335 sd_event_source *s;
1336 int r;
1337
1338 assert_return(e, -EINVAL);
1339 assert_return(callback, -EINVAL);
1340 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1341 assert_return(!event_pid_changed(e), -ECHILD);
1342
1343 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1344 if (r < 0)
1345 return r;
1346
1347 s = source_new(e, !ret, SOURCE_EXIT);
1348 if (!s)
1349 return -ENOMEM;
1350
1351 s->exit.callback = callback;
1352 s->userdata = userdata;
1353 s->exit.prioq_index = PRIOQ_IDX_NULL;
1354 s->enabled = SD_EVENT_ONESHOT;
1355
1356 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1357 if (r < 0) {
1358 source_free(s);
1359 return r;
1360 }
1361
1362 if (ret)
1363 *ret = s;
1364
1365 return 0;
1366 }
1367
1368 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1369
1370 if (!s)
1371 return NULL;
1372
1373 assert(s->n_ref >= 1);
1374 s->n_ref++;
1375
1376 return s;
1377 }
1378
1379 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1380
1381 if (!s)
1382 return NULL;
1383
1384 assert(s->n_ref >= 1);
1385 s->n_ref--;
1386
1387 if (s->n_ref <= 0) {
1388 /* Here's a special hack: when we are called from a
1389 * dispatch handler we won't free the event source
1390 * immediately, but we will detach the fd from the
1391 * epoll. This way it is safe for the caller to unref
1392 * the event source and immediately close the fd, but
1393 * we still retain a valid event source object after
1394 * the callback. */
1395
1396 if (s->dispatching) {
1397 if (s->type == SOURCE_IO)
1398 source_io_unregister(s);
1399
1400 source_disconnect(s);
1401 } else
1402 source_free(s);
1403 }
1404
1405 return NULL;
1406 }
1407
1408 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1409 assert_return(s, -EINVAL);
1410 assert_return(!event_pid_changed(s->event), -ECHILD);
1411
1412 return free_and_strdup(&s->description, description);
1413 }
1414
1415 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1416 assert_return(s, -EINVAL);
1417 assert_return(description, -EINVAL);
1418 assert_return(s->description, -ENXIO);
1419 assert_return(!event_pid_changed(s->event), -ECHILD);
1420
1421 *description = s->description;
1422 return 0;
1423 }
1424
1425 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1426 assert_return(s, NULL);
1427
1428 return s->event;
1429 }
1430
1431 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1432 assert_return(s, -EINVAL);
1433 assert_return(s->type != SOURCE_EXIT, -EDOM);
1434 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1435 assert_return(!event_pid_changed(s->event), -ECHILD);
1436
1437 return s->pending;
1438 }
1439
1440 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1441 assert_return(s, -EINVAL);
1442 assert_return(s->type == SOURCE_IO, -EDOM);
1443 assert_return(!event_pid_changed(s->event), -ECHILD);
1444
1445 return s->io.fd;
1446 }
1447
1448 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1449 int r;
1450
1451 assert_return(s, -EINVAL);
1452 assert_return(fd >= 0, -EBADF);
1453 assert_return(s->type == SOURCE_IO, -EDOM);
1454 assert_return(!event_pid_changed(s->event), -ECHILD);
1455
1456 if (s->io.fd == fd)
1457 return 0;
1458
1459 if (s->enabled == SD_EVENT_OFF) {
1460 s->io.fd = fd;
1461 s->io.registered = false;
1462 } else {
1463 int saved_fd;
1464
1465 saved_fd = s->io.fd;
1466 assert(s->io.registered);
1467
1468 s->io.fd = fd;
1469 s->io.registered = false;
1470
1471 r = source_io_register(s, s->enabled, s->io.events);
1472 if (r < 0) {
1473 s->io.fd = saved_fd;
1474 s->io.registered = true;
1475 return r;
1476 }
1477
1478 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1479 }
1480
1481 return 0;
1482 }
1483
1484 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1485 assert_return(s, -EINVAL);
1486 assert_return(events, -EINVAL);
1487 assert_return(s->type == SOURCE_IO, -EDOM);
1488 assert_return(!event_pid_changed(s->event), -ECHILD);
1489
1490 *events = s->io.events;
1491 return 0;
1492 }
1493
1494 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1495 int r;
1496
1497 assert_return(s, -EINVAL);
1498 assert_return(s->type == SOURCE_IO, -EDOM);
1499 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1500 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1501 assert_return(!event_pid_changed(s->event), -ECHILD);
1502
1503 /* edge-triggered updates are never skipped, so we can reset edges */
1504 if (s->io.events == events && !(events & EPOLLET))
1505 return 0;
1506
1507 if (s->enabled != SD_EVENT_OFF) {
1508 r = source_io_register(s, s->enabled, events);
1509 if (r < 0)
1510 return r;
1511 }
1512
1513 s->io.events = events;
1514 source_set_pending(s, false);
1515
1516 return 0;
1517 }
1518
1519 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1520 assert_return(s, -EINVAL);
1521 assert_return(revents, -EINVAL);
1522 assert_return(s->type == SOURCE_IO, -EDOM);
1523 assert_return(s->pending, -ENODATA);
1524 assert_return(!event_pid_changed(s->event), -ECHILD);
1525
1526 *revents = s->io.revents;
1527 return 0;
1528 }
1529
1530 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1531 assert_return(s, -EINVAL);
1532 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1533 assert_return(!event_pid_changed(s->event), -ECHILD);
1534
1535 return s->signal.sig;
1536 }
1537
1538 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1539 assert_return(s, -EINVAL);
1540 assert_return(!event_pid_changed(s->event), -ECHILD);
1541
1542 *priority = s->priority;
1543 return 0;
1544 }
1545
1546 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1547 int r;
1548
1549 assert_return(s, -EINVAL);
1550 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1551 assert_return(!event_pid_changed(s->event), -ECHILD);
1552
1553 if (s->priority == priority)
1554 return 0;
1555
1556 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1557 struct signal_data *old, *d;
1558
1559 /* Move us from the signalfd belonging to the old
1560 * priority to the signalfd of the new priority */
1561
1562 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1563
1564 s->priority = priority;
1565
1566 r = event_make_signal_data(s->event, s->signal.sig, &d);
1567 if (r < 0) {
1568 s->priority = old->priority;
1569 return r;
1570 }
1571
1572 event_unmask_signal_data(s->event, old, s->signal.sig);
1573 } else
1574 s->priority = priority;
1575
1576 if (s->pending)
1577 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1578
1579 if (s->prepare)
1580 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1581
1582 if (s->type == SOURCE_EXIT)
1583 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1584
1585 return 0;
1586 }
1587
1588 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1589 assert_return(s, -EINVAL);
1590 assert_return(m, -EINVAL);
1591 assert_return(!event_pid_changed(s->event), -ECHILD);
1592
1593 *m = s->enabled;
1594 return 0;
1595 }
1596
1597 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1598 int r;
1599
1600 assert_return(s, -EINVAL);
1601 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1602 assert_return(!event_pid_changed(s->event), -ECHILD);
1603
1604 /* If we are dead anyway, we are fine with turning off
1605 * sources, but everything else needs to fail. */
1606 if (s->event->state == SD_EVENT_FINISHED)
1607 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1608
1609 if (s->enabled == m)
1610 return 0;
1611
1612 if (m == SD_EVENT_OFF) {
1613
1614 switch (s->type) {
1615
1616 case SOURCE_IO:
1617 source_io_unregister(s);
1618 s->enabled = m;
1619 break;
1620
1621 case SOURCE_TIME_REALTIME:
1622 case SOURCE_TIME_BOOTTIME:
1623 case SOURCE_TIME_MONOTONIC:
1624 case SOURCE_TIME_REALTIME_ALARM:
1625 case SOURCE_TIME_BOOTTIME_ALARM: {
1626 struct clock_data *d;
1627
1628 s->enabled = m;
1629 d = event_get_clock_data(s->event, s->type);
1630 assert(d);
1631
1632 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1633 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1634 d->needs_rearm = true;
1635 break;
1636 }
1637
1638 case SOURCE_SIGNAL:
1639 s->enabled = m;
1640
1641 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1642 break;
1643
1644 case SOURCE_CHILD:
1645 s->enabled = m;
1646
1647 assert(s->event->n_enabled_child_sources > 0);
1648 s->event->n_enabled_child_sources--;
1649
1650 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1651 break;
1652
1653 case SOURCE_EXIT:
1654 s->enabled = m;
1655 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1656 break;
1657
1658 case SOURCE_DEFER:
1659 case SOURCE_POST:
1660 s->enabled = m;
1661 break;
1662
1663 default:
1664 assert_not_reached("Wut? I shouldn't exist.");
1665 }
1666
1667 } else {
1668 switch (s->type) {
1669
1670 case SOURCE_IO:
1671 r = source_io_register(s, m, s->io.events);
1672 if (r < 0)
1673 return r;
1674
1675 s->enabled = m;
1676 break;
1677
1678 case SOURCE_TIME_REALTIME:
1679 case SOURCE_TIME_BOOTTIME:
1680 case SOURCE_TIME_MONOTONIC:
1681 case SOURCE_TIME_REALTIME_ALARM:
1682 case SOURCE_TIME_BOOTTIME_ALARM: {
1683 struct clock_data *d;
1684
1685 s->enabled = m;
1686 d = event_get_clock_data(s->event, s->type);
1687 assert(d);
1688
1689 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1690 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1691 d->needs_rearm = true;
1692 break;
1693 }
1694
1695 case SOURCE_SIGNAL:
1696
1697 s->enabled = m;
1698
1699 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1700 if (r < 0) {
1701 s->enabled = SD_EVENT_OFF;
1702 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1703 return r;
1704 }
1705
1706 break;
1707
1708 case SOURCE_CHILD:
1709
1710 if (s->enabled == SD_EVENT_OFF)
1711 s->event->n_enabled_child_sources++;
1712
1713 s->enabled = m;
1714
1715 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1716 if (r < 0) {
1717 s->enabled = SD_EVENT_OFF;
1718 s->event->n_enabled_child_sources--;
1719 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1720 return r;
1721 }
1722
1723 break;
1724
1725 case SOURCE_EXIT:
1726 s->enabled = m;
1727 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1728 break;
1729
1730 case SOURCE_DEFER:
1731 case SOURCE_POST:
1732 s->enabled = m;
1733 break;
1734
1735 default:
1736 assert_not_reached("Wut? I shouldn't exist.");
1737 }
1738 }
1739
1740 if (s->pending)
1741 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1742
1743 if (s->prepare)
1744 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1745
1746 return 0;
1747 }
1748
1749 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1750 assert_return(s, -EINVAL);
1751 assert_return(usec, -EINVAL);
1752 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1753 assert_return(!event_pid_changed(s->event), -ECHILD);
1754
1755 *usec = s->time.next;
1756 return 0;
1757 }
1758
1759 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1760 struct clock_data *d;
1761
1762 assert_return(s, -EINVAL);
1763 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1764 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1765 assert_return(!event_pid_changed(s->event), -ECHILD);
1766
1767 s->time.next = usec;
1768
1769 source_set_pending(s, false);
1770
1771 d = event_get_clock_data(s->event, s->type);
1772 assert(d);
1773
1774 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1775 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1776 d->needs_rearm = true;
1777
1778 return 0;
1779 }
1780
1781 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1782 assert_return(s, -EINVAL);
1783 assert_return(usec, -EINVAL);
1784 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1785 assert_return(!event_pid_changed(s->event), -ECHILD);
1786
1787 *usec = s->time.accuracy;
1788 return 0;
1789 }
1790
1791 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1792 struct clock_data *d;
1793
1794 assert_return(s, -EINVAL);
1795 assert_return(usec != (uint64_t) -1, -EINVAL);
1796 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1797 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1798 assert_return(!event_pid_changed(s->event), -ECHILD);
1799
1800 if (usec == 0)
1801 usec = DEFAULT_ACCURACY_USEC;
1802
1803 s->time.accuracy = usec;
1804
1805 source_set_pending(s, false);
1806
1807 d = event_get_clock_data(s->event, s->type);
1808 assert(d);
1809
1810 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1811 d->needs_rearm = true;
1812
1813 return 0;
1814 }
1815
1816 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1817 assert_return(s, -EINVAL);
1818 assert_return(clock, -EINVAL);
1819 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1820 assert_return(!event_pid_changed(s->event), -ECHILD);
1821
1822 *clock = event_source_type_to_clock(s->type);
1823 return 0;
1824 }
1825
1826 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1827 assert_return(s, -EINVAL);
1828 assert_return(pid, -EINVAL);
1829 assert_return(s->type == SOURCE_CHILD, -EDOM);
1830 assert_return(!event_pid_changed(s->event), -ECHILD);
1831
1832 *pid = s->child.pid;
1833 return 0;
1834 }
1835
1836 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1837 int r;
1838
1839 assert_return(s, -EINVAL);
1840 assert_return(s->type != SOURCE_EXIT, -EDOM);
1841 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1842 assert_return(!event_pid_changed(s->event), -ECHILD);
1843
1844 if (s->prepare == callback)
1845 return 0;
1846
1847 if (callback && s->prepare) {
1848 s->prepare = callback;
1849 return 0;
1850 }
1851
1852 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1853 if (r < 0)
1854 return r;
1855
1856 s->prepare = callback;
1857
1858 if (callback) {
1859 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1860 if (r < 0)
1861 return r;
1862 } else
1863 prioq_remove(s->event->prepare, s, &s->prepare_index);
1864
1865 return 0;
1866 }
1867
1868 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1869 assert_return(s, NULL);
1870
1871 return s->userdata;
1872 }
1873
1874 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1875 void *ret;
1876
1877 assert_return(s, NULL);
1878
1879 ret = s->userdata;
1880 s->userdata = userdata;
1881
1882 return ret;
1883 }
1884
1885 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1886 usec_t c;
1887 assert(e);
1888 assert(a <= b);
1889
1890 if (a <= 0)
1891 return 0;
1892 if (a >= USEC_INFINITY)
1893 return USEC_INFINITY;
1894
1895 if (b <= a + 1)
1896 return a;
1897
1898 initialize_perturb(e);
1899
1900 /*
1901 Find a good time to wake up again between times a and b. We
1902 have two goals here:
1903
1904 a) We want to wake up as seldom as possible, hence prefer
1905 later times over earlier times.
1906
1907 b) But if we have to wake up, then let's make sure to
1908 dispatch as much as possible on the entire system.
1909
1910 We implement this by waking up everywhere at the same time
1911 within any given minute if we can, synchronised via the
1912 perturbation value determined from the boot ID. If we can't,
1913 then we try to find the same spot in every 10s, then 1s and
1914 then 250ms step. Otherwise, we pick the last possible time
1915 to wake up.
1916 */
1917
1918 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1919 if (c >= b) {
1920 if (_unlikely_(c < USEC_PER_MINUTE))
1921 return b;
1922
1923 c -= USEC_PER_MINUTE;
1924 }
1925
1926 if (c >= a)
1927 return c;
1928
1929 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1930 if (c >= b) {
1931 if (_unlikely_(c < USEC_PER_SEC*10))
1932 return b;
1933
1934 c -= USEC_PER_SEC*10;
1935 }
1936
1937 if (c >= a)
1938 return c;
1939
1940 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1941 if (c >= b) {
1942 if (_unlikely_(c < USEC_PER_SEC))
1943 return b;
1944
1945 c -= USEC_PER_SEC;
1946 }
1947
1948 if (c >= a)
1949 return c;
1950
1951 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1952 if (c >= b) {
1953 if (_unlikely_(c < USEC_PER_MSEC*250))
1954 return b;
1955
1956 c -= USEC_PER_MSEC*250;
1957 }
1958
1959 if (c >= a)
1960 return c;
1961
1962 return b;
1963 }
1964
1965 static int event_arm_timer(
1966 sd_event *e,
1967 struct clock_data *d) {
1968
1969 struct itimerspec its = {};
1970 sd_event_source *a, *b;
1971 usec_t t;
1972 int r;
1973
1974 assert(e);
1975 assert(d);
1976
1977 if (!d->needs_rearm)
1978 return 0;
1979 else
1980 d->needs_rearm = false;
1981
1982 a = prioq_peek(d->earliest);
1983 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
1984
1985 if (d->fd < 0)
1986 return 0;
1987
1988 if (d->next == USEC_INFINITY)
1989 return 0;
1990
1991 /* disarm */
1992 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1993 if (r < 0)
1994 return r;
1995
1996 d->next = USEC_INFINITY;
1997 return 0;
1998 }
1999
2000 b = prioq_peek(d->latest);
2001 assert_se(b && b->enabled != SD_EVENT_OFF);
2002
2003 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2004 if (d->next == t)
2005 return 0;
2006
2007 assert_se(d->fd >= 0);
2008
2009 if (t == 0) {
2010 /* We don' want to disarm here, just mean some time looooong ago. */
2011 its.it_value.tv_sec = 0;
2012 its.it_value.tv_nsec = 1;
2013 } else
2014 timespec_store(&its.it_value, t);
2015
2016 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2017 if (r < 0)
2018 return -errno;
2019
2020 d->next = t;
2021 return 0;
2022 }
2023
2024 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2025 assert(e);
2026 assert(s);
2027 assert(s->type == SOURCE_IO);
2028
2029 /* If the event source was already pending, we just OR in the
2030 * new revents, otherwise we reset the value. The ORing is
2031 * necessary to handle EPOLLONESHOT events properly where
2032 * readability might happen independently of writability, and
2033 * we need to keep track of both */
2034
2035 if (s->pending)
2036 s->io.revents |= revents;
2037 else
2038 s->io.revents = revents;
2039
2040 return source_set_pending(s, true);
2041 }
2042
2043 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2044 uint64_t x;
2045 ssize_t ss;
2046
2047 assert(e);
2048 assert(fd >= 0);
2049
2050 assert_return(events == EPOLLIN, -EIO);
2051
2052 ss = read(fd, &x, sizeof(x));
2053 if (ss < 0) {
2054 if (IN_SET(errno, EAGAIN, EINTR))
2055 return 0;
2056
2057 return -errno;
2058 }
2059
2060 if (_unlikely_(ss != sizeof(x)))
2061 return -EIO;
2062
2063 if (next)
2064 *next = USEC_INFINITY;
2065
2066 return 0;
2067 }
2068
2069 static int process_timer(
2070 sd_event *e,
2071 usec_t n,
2072 struct clock_data *d) {
2073
2074 sd_event_source *s;
2075 int r;
2076
2077 assert(e);
2078 assert(d);
2079
2080 for (;;) {
2081 s = prioq_peek(d->earliest);
2082 if (!s ||
2083 s->time.next > n ||
2084 s->enabled == SD_EVENT_OFF ||
2085 s->pending)
2086 break;
2087
2088 r = source_set_pending(s, true);
2089 if (r < 0)
2090 return r;
2091
2092 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2093 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2094 d->needs_rearm = true;
2095 }
2096
2097 return 0;
2098 }
2099
2100 static int process_child(sd_event *e) {
2101 sd_event_source *s;
2102 Iterator i;
2103 int r;
2104
2105 assert(e);
2106
2107 e->need_process_child = false;
2108
2109 /*
2110 So, this is ugly. We iteratively invoke waitid() with P_PID
2111 + WNOHANG for each PID we wait for, instead of using
2112 P_ALL. This is because we only want to get child
2113 information of very specific child processes, and not all
2114 of them. We might not have processed the SIGCHLD even of a
2115 previous invocation and we don't want to maintain a
2116 unbounded *per-child* event queue, hence we really don't
2117 want anything flushed out of the kernel's queue that we
2118 don't care about. Since this is O(n) this means that if you
2119 have a lot of processes you probably want to handle SIGCHLD
2120 yourself.
2121
2122 We do not reap the children here (by using WNOWAIT), this
2123 is only done after the event source is dispatched so that
2124 the callback still sees the process as a zombie.
2125 */
2126
2127 HASHMAP_FOREACH(s, e->child_sources, i) {
2128 assert(s->type == SOURCE_CHILD);
2129
2130 if (s->pending)
2131 continue;
2132
2133 if (s->enabled == SD_EVENT_OFF)
2134 continue;
2135
2136 zero(s->child.siginfo);
2137 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2138 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2139 if (r < 0)
2140 return -errno;
2141
2142 if (s->child.siginfo.si_pid != 0) {
2143 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2144
2145 if (!zombie && (s->child.options & WEXITED)) {
2146 /* If the child isn't dead then let's
2147 * immediately remove the state change
2148 * from the queue, since there's no
2149 * benefit in leaving it queued */
2150
2151 assert(s->child.options & (WSTOPPED|WCONTINUED));
2152 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2153 }
2154
2155 r = source_set_pending(s, true);
2156 if (r < 0)
2157 return r;
2158 }
2159 }
2160
2161 return 0;
2162 }
2163
2164 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2165 bool read_one = false;
2166 int r;
2167
2168 assert(e);
2169 assert_return(events == EPOLLIN, -EIO);
2170
2171 /* If there's a signal queued on this priority and SIGCHLD is
2172 on this priority too, then make sure to recheck the
2173 children we watch. This is because we only ever dequeue
2174 the first signal per priority, and if we dequeue one, and
2175 SIGCHLD might be enqueued later we wouldn't know, but we
2176 might have higher priority children we care about hence we
2177 need to check that explicitly. */
2178
2179 if (sigismember(&d->sigset, SIGCHLD))
2180 e->need_process_child = true;
2181
2182 /* If there's already an event source pending for this
2183 * priority we don't read another */
2184 if (d->current)
2185 return 0;
2186
2187 for (;;) {
2188 struct signalfd_siginfo si;
2189 ssize_t n;
2190 sd_event_source *s = NULL;
2191
2192 n = read(d->fd, &si, sizeof(si));
2193 if (n < 0) {
2194 if (IN_SET(errno, EAGAIN, EINTR))
2195 return read_one;
2196
2197 return -errno;
2198 }
2199
2200 if (_unlikely_(n != sizeof(si)))
2201 return -EIO;
2202
2203 assert(SIGNAL_VALID(si.ssi_signo));
2204
2205 read_one = true;
2206
2207 if (e->signal_sources)
2208 s = e->signal_sources[si.ssi_signo];
2209 if (!s)
2210 continue;
2211 if (s->pending)
2212 continue;
2213
2214 s->signal.siginfo = si;
2215 d->current = s;
2216
2217 r = source_set_pending(s, true);
2218 if (r < 0)
2219 return r;
2220
2221 return 1;
2222 }
2223 }
2224
2225 static int source_dispatch(sd_event_source *s) {
2226 EventSourceType saved_type;
2227 int r = 0;
2228
2229 assert(s);
2230 assert(s->pending || s->type == SOURCE_EXIT);
2231
2232 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2233 * the event. */
2234 saved_type = s->type;
2235
2236 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2237 r = source_set_pending(s, false);
2238 if (r < 0)
2239 return r;
2240 }
2241
2242 if (s->type != SOURCE_POST) {
2243 sd_event_source *z;
2244 Iterator i;
2245
2246 /* If we execute a non-post source, let's mark all
2247 * post sources as pending */
2248
2249 SET_FOREACH(z, s->event->post_sources, i) {
2250 if (z->enabled == SD_EVENT_OFF)
2251 continue;
2252
2253 r = source_set_pending(z, true);
2254 if (r < 0)
2255 return r;
2256 }
2257 }
2258
2259 if (s->enabled == SD_EVENT_ONESHOT) {
2260 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2261 if (r < 0)
2262 return r;
2263 }
2264
2265 s->dispatching = true;
2266
2267 switch (s->type) {
2268
2269 case SOURCE_IO:
2270 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2271 break;
2272
2273 case SOURCE_TIME_REALTIME:
2274 case SOURCE_TIME_BOOTTIME:
2275 case SOURCE_TIME_MONOTONIC:
2276 case SOURCE_TIME_REALTIME_ALARM:
2277 case SOURCE_TIME_BOOTTIME_ALARM:
2278 r = s->time.callback(s, s->time.next, s->userdata);
2279 break;
2280
2281 case SOURCE_SIGNAL:
2282 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2283 break;
2284
2285 case SOURCE_CHILD: {
2286 bool zombie;
2287
2288 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2289
2290 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2291
2292 /* Now, reap the PID for good. */
2293 if (zombie)
2294 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2295
2296 break;
2297 }
2298
2299 case SOURCE_DEFER:
2300 r = s->defer.callback(s, s->userdata);
2301 break;
2302
2303 case SOURCE_POST:
2304 r = s->post.callback(s, s->userdata);
2305 break;
2306
2307 case SOURCE_EXIT:
2308 r = s->exit.callback(s, s->userdata);
2309 break;
2310
2311 case SOURCE_WATCHDOG:
2312 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2313 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2314 assert_not_reached("Wut? I shouldn't exist.");
2315 }
2316
2317 s->dispatching = false;
2318
2319 if (r < 0)
2320 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2321 strna(s->description), event_source_type_to_string(saved_type));
2322
2323 if (s->n_ref == 0)
2324 source_free(s);
2325 else if (r < 0)
2326 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2327
2328 return 1;
2329 }
2330
2331 static int event_prepare(sd_event *e) {
2332 int r;
2333
2334 assert(e);
2335
2336 for (;;) {
2337 sd_event_source *s;
2338
2339 s = prioq_peek(e->prepare);
2340 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2341 break;
2342
2343 s->prepare_iteration = e->iteration;
2344 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2345 if (r < 0)
2346 return r;
2347
2348 assert(s->prepare);
2349
2350 s->dispatching = true;
2351 r = s->prepare(s, s->userdata);
2352 s->dispatching = false;
2353
2354 if (r < 0)
2355 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2356 strna(s->description), event_source_type_to_string(s->type));
2357
2358 if (s->n_ref == 0)
2359 source_free(s);
2360 else if (r < 0)
2361 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2362 }
2363
2364 return 0;
2365 }
2366
2367 static int dispatch_exit(sd_event *e) {
2368 sd_event_source *p;
2369 int r;
2370
2371 assert(e);
2372
2373 p = prioq_peek(e->exit);
2374 if (!p || p->enabled == SD_EVENT_OFF) {
2375 e->state = SD_EVENT_FINISHED;
2376 return 0;
2377 }
2378
2379 sd_event_ref(e);
2380 e->iteration++;
2381 e->state = SD_EVENT_EXITING;
2382
2383 r = source_dispatch(p);
2384
2385 e->state = SD_EVENT_INITIAL;
2386 sd_event_unref(e);
2387
2388 return r;
2389 }
2390
2391 static sd_event_source* event_next_pending(sd_event *e) {
2392 sd_event_source *p;
2393
2394 assert(e);
2395
2396 p = prioq_peek(e->pending);
2397 if (!p)
2398 return NULL;
2399
2400 if (p->enabled == SD_EVENT_OFF)
2401 return NULL;
2402
2403 return p;
2404 }
2405
2406 static int arm_watchdog(sd_event *e) {
2407 struct itimerspec its = {};
2408 usec_t t;
2409 int r;
2410
2411 assert(e);
2412 assert(e->watchdog_fd >= 0);
2413
2414 t = sleep_between(e,
2415 e->watchdog_last + (e->watchdog_period / 2),
2416 e->watchdog_last + (e->watchdog_period * 3 / 4));
2417
2418 timespec_store(&its.it_value, t);
2419
2420 /* Make sure we never set the watchdog to 0, which tells the
2421 * kernel to disable it. */
2422 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2423 its.it_value.tv_nsec = 1;
2424
2425 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2426 if (r < 0)
2427 return -errno;
2428
2429 return 0;
2430 }
2431
2432 static int process_watchdog(sd_event *e) {
2433 assert(e);
2434
2435 if (!e->watchdog)
2436 return 0;
2437
2438 /* Don't notify watchdog too often */
2439 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2440 return 0;
2441
2442 sd_notify(false, "WATCHDOG=1");
2443 e->watchdog_last = e->timestamp.monotonic;
2444
2445 return arm_watchdog(e);
2446 }
2447
2448 _public_ int sd_event_prepare(sd_event *e) {
2449 int r;
2450
2451 assert_return(e, -EINVAL);
2452 assert_return(!event_pid_changed(e), -ECHILD);
2453 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2454 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2455
2456 if (e->exit_requested)
2457 goto pending;
2458
2459 e->iteration++;
2460
2461 e->state = SD_EVENT_PREPARING;
2462 r = event_prepare(e);
2463 e->state = SD_EVENT_INITIAL;
2464 if (r < 0)
2465 return r;
2466
2467 r = event_arm_timer(e, &e->realtime);
2468 if (r < 0)
2469 return r;
2470
2471 r = event_arm_timer(e, &e->boottime);
2472 if (r < 0)
2473 return r;
2474
2475 r = event_arm_timer(e, &e->monotonic);
2476 if (r < 0)
2477 return r;
2478
2479 r = event_arm_timer(e, &e->realtime_alarm);
2480 if (r < 0)
2481 return r;
2482
2483 r = event_arm_timer(e, &e->boottime_alarm);
2484 if (r < 0)
2485 return r;
2486
2487 if (event_next_pending(e) || e->need_process_child)
2488 goto pending;
2489
2490 e->state = SD_EVENT_ARMED;
2491
2492 return 0;
2493
2494 pending:
2495 e->state = SD_EVENT_ARMED;
2496 r = sd_event_wait(e, 0);
2497 if (r == 0)
2498 e->state = SD_EVENT_ARMED;
2499
2500 return r;
2501 }
2502
2503 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2504 struct epoll_event *ev_queue;
2505 unsigned ev_queue_max;
2506 int r, m, i;
2507
2508 assert_return(e, -EINVAL);
2509 assert_return(!event_pid_changed(e), -ECHILD);
2510 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2511 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2512
2513 if (e->exit_requested) {
2514 e->state = SD_EVENT_PENDING;
2515 return 1;
2516 }
2517
2518 ev_queue_max = MAX(e->n_sources, 1u);
2519 ev_queue = newa(struct epoll_event, ev_queue_max);
2520
2521 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2522 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2523 if (m < 0) {
2524 if (errno == EINTR) {
2525 e->state = SD_EVENT_PENDING;
2526 return 1;
2527 }
2528
2529 r = -errno;
2530 goto finish;
2531 }
2532
2533 triple_timestamp_get(&e->timestamp);
2534
2535 for (i = 0; i < m; i++) {
2536
2537 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2538 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2539 else {
2540 WakeupType *t = ev_queue[i].data.ptr;
2541
2542 switch (*t) {
2543
2544 case WAKEUP_EVENT_SOURCE:
2545 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2546 break;
2547
2548 case WAKEUP_CLOCK_DATA: {
2549 struct clock_data *d = ev_queue[i].data.ptr;
2550 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2551 break;
2552 }
2553
2554 case WAKEUP_SIGNAL_DATA:
2555 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2556 break;
2557
2558 default:
2559 assert_not_reached("Invalid wake-up pointer");
2560 }
2561 }
2562 if (r < 0)
2563 goto finish;
2564 }
2565
2566 r = process_watchdog(e);
2567 if (r < 0)
2568 goto finish;
2569
2570 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2571 if (r < 0)
2572 goto finish;
2573
2574 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2575 if (r < 0)
2576 goto finish;
2577
2578 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2579 if (r < 0)
2580 goto finish;
2581
2582 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2583 if (r < 0)
2584 goto finish;
2585
2586 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2587 if (r < 0)
2588 goto finish;
2589
2590 if (e->need_process_child) {
2591 r = process_child(e);
2592 if (r < 0)
2593 goto finish;
2594 }
2595
2596 if (event_next_pending(e)) {
2597 e->state = SD_EVENT_PENDING;
2598
2599 return 1;
2600 }
2601
2602 r = 0;
2603
2604 finish:
2605 e->state = SD_EVENT_INITIAL;
2606
2607 return r;
2608 }
2609
2610 _public_ int sd_event_dispatch(sd_event *e) {
2611 sd_event_source *p;
2612 int r;
2613
2614 assert_return(e, -EINVAL);
2615 assert_return(!event_pid_changed(e), -ECHILD);
2616 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2617 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2618
2619 if (e->exit_requested)
2620 return dispatch_exit(e);
2621
2622 p = event_next_pending(e);
2623 if (p) {
2624 sd_event_ref(e);
2625
2626 e->state = SD_EVENT_RUNNING;
2627 r = source_dispatch(p);
2628 e->state = SD_EVENT_INITIAL;
2629
2630 sd_event_unref(e);
2631
2632 return r;
2633 }
2634
2635 e->state = SD_EVENT_INITIAL;
2636
2637 return 1;
2638 }
2639
2640 static void event_log_delays(sd_event *e) {
2641 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2642 unsigned i;
2643 int o;
2644
2645 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2646 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2647 e->delays[i] = 0;
2648 }
2649 log_debug("Event loop iterations: %.*s", o, b);
2650 }
2651
2652 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2653 int r;
2654
2655 assert_return(e, -EINVAL);
2656 assert_return(!event_pid_changed(e), -ECHILD);
2657 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2658 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2659
2660 if (e->profile_delays && e->last_run) {
2661 usec_t this_run;
2662 unsigned l;
2663
2664 this_run = now(CLOCK_MONOTONIC);
2665
2666 l = u64log2(this_run - e->last_run);
2667 assert(l < sizeof(e->delays));
2668 e->delays[l]++;
2669
2670 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2671 event_log_delays(e);
2672 e->last_log = this_run;
2673 }
2674 }
2675
2676 r = sd_event_prepare(e);
2677 if (r == 0)
2678 /* There was nothing? Then wait... */
2679 r = sd_event_wait(e, timeout);
2680
2681 if (e->profile_delays)
2682 e->last_run = now(CLOCK_MONOTONIC);
2683
2684 if (r > 0) {
2685 /* There's something now, then let's dispatch it */
2686 r = sd_event_dispatch(e);
2687 if (r < 0)
2688 return r;
2689
2690 return 1;
2691 }
2692
2693 return r;
2694 }
2695
2696 _public_ int sd_event_loop(sd_event *e) {
2697 int r;
2698
2699 assert_return(e, -EINVAL);
2700 assert_return(!event_pid_changed(e), -ECHILD);
2701 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2702
2703 sd_event_ref(e);
2704
2705 while (e->state != SD_EVENT_FINISHED) {
2706 r = sd_event_run(e, (uint64_t) -1);
2707 if (r < 0)
2708 goto finish;
2709 }
2710
2711 r = e->exit_code;
2712
2713 finish:
2714 sd_event_unref(e);
2715 return r;
2716 }
2717
2718 _public_ int sd_event_get_fd(sd_event *e) {
2719
2720 assert_return(e, -EINVAL);
2721 assert_return(!event_pid_changed(e), -ECHILD);
2722
2723 return e->epoll_fd;
2724 }
2725
2726 _public_ int sd_event_get_state(sd_event *e) {
2727 assert_return(e, -EINVAL);
2728 assert_return(!event_pid_changed(e), -ECHILD);
2729
2730 return e->state;
2731 }
2732
2733 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2734 assert_return(e, -EINVAL);
2735 assert_return(code, -EINVAL);
2736 assert_return(!event_pid_changed(e), -ECHILD);
2737
2738 if (!e->exit_requested)
2739 return -ENODATA;
2740
2741 *code = e->exit_code;
2742 return 0;
2743 }
2744
2745 _public_ int sd_event_exit(sd_event *e, int code) {
2746 assert_return(e, -EINVAL);
2747 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2748 assert_return(!event_pid_changed(e), -ECHILD);
2749
2750 e->exit_requested = true;
2751 e->exit_code = code;
2752
2753 return 0;
2754 }
2755
2756 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2757 assert_return(e, -EINVAL);
2758 assert_return(usec, -EINVAL);
2759 assert_return(!event_pid_changed(e), -ECHILD);
2760
2761 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2762 return -EOPNOTSUPP;
2763
2764 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2765 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2766 * the purpose of getting the time this doesn't matter. */
2767 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2768 return -EOPNOTSUPP;
2769
2770 if (!triple_timestamp_is_set(&e->timestamp)) {
2771 /* Implicitly fall back to now() if we never ran
2772 * before and thus have no cached time. */
2773 *usec = now(clock);
2774 return 1;
2775 }
2776
2777 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2778 return 0;
2779 }
2780
2781 _public_ int sd_event_default(sd_event **ret) {
2782
2783 static thread_local sd_event *default_event = NULL;
2784 sd_event *e = NULL;
2785 int r;
2786
2787 if (!ret)
2788 return !!default_event;
2789
2790 if (default_event) {
2791 *ret = sd_event_ref(default_event);
2792 return 0;
2793 }
2794
2795 r = sd_event_new(&e);
2796 if (r < 0)
2797 return r;
2798
2799 e->default_event_ptr = &default_event;
2800 e->tid = gettid();
2801 default_event = e;
2802
2803 *ret = e;
2804 return 1;
2805 }
2806
2807 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2808 assert_return(e, -EINVAL);
2809 assert_return(tid, -EINVAL);
2810 assert_return(!event_pid_changed(e), -ECHILD);
2811
2812 if (e->tid != 0) {
2813 *tid = e->tid;
2814 return 0;
2815 }
2816
2817 return -ENXIO;
2818 }
2819
2820 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2821 int r;
2822
2823 assert_return(e, -EINVAL);
2824 assert_return(!event_pid_changed(e), -ECHILD);
2825
2826 if (e->watchdog == !!b)
2827 return e->watchdog;
2828
2829 if (b) {
2830 struct epoll_event ev = {};
2831
2832 r = sd_watchdog_enabled(false, &e->watchdog_period);
2833 if (r <= 0)
2834 return r;
2835
2836 /* Issue first ping immediately */
2837 sd_notify(false, "WATCHDOG=1");
2838 e->watchdog_last = now(CLOCK_MONOTONIC);
2839
2840 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2841 if (e->watchdog_fd < 0)
2842 return -errno;
2843
2844 r = arm_watchdog(e);
2845 if (r < 0)
2846 goto fail;
2847
2848 ev.events = EPOLLIN;
2849 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2850
2851 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2852 if (r < 0) {
2853 r = -errno;
2854 goto fail;
2855 }
2856
2857 } else {
2858 if (e->watchdog_fd >= 0) {
2859 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2860 e->watchdog_fd = safe_close(e->watchdog_fd);
2861 }
2862 }
2863
2864 e->watchdog = !!b;
2865 return e->watchdog;
2866
2867 fail:
2868 e->watchdog_fd = safe_close(e->watchdog_fd);
2869 return r;
2870 }
2871
2872 _public_ int sd_event_get_watchdog(sd_event *e) {
2873 assert_return(e, -EINVAL);
2874 assert_return(!event_pid_changed(e), -ECHILD);
2875
2876 return e->watchdog;
2877 }
2878
2879 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2880 assert_return(e, -EINVAL);
2881 assert_return(!event_pid_changed(e), -ECHILD);
2882
2883 *ret = e->iteration;
2884 return 0;
2885 }