]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
sd-event: make sure to create a signal queue for the right signal
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34 #include "set.h"
35 #include "list.h"
36 #include "signal-util.h"
37
38 #include "sd-event.h"
39
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43 SOURCE_IO,
44 SOURCE_TIME_REALTIME,
45 SOURCE_TIME_BOOTTIME,
46 SOURCE_TIME_MONOTONIC,
47 SOURCE_TIME_REALTIME_ALARM,
48 SOURCE_TIME_BOOTTIME_ALARM,
49 SOURCE_SIGNAL,
50 SOURCE_CHILD,
51 SOURCE_DEFER,
52 SOURCE_POST,
53 SOURCE_EXIT,
54 SOURCE_WATCHDOG,
55 _SOURCE_EVENT_SOURCE_TYPE_MAX,
56 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
57 } EventSourceType;
58
59 /* All objects we use in epoll events start with this value, so that
60 * we know how to dispatch it */
61 typedef enum WakeupType {
62 WAKEUP_NONE,
63 WAKEUP_EVENT_SOURCE,
64 WAKEUP_CLOCK_DATA,
65 WAKEUP_SIGNAL_DATA,
66 _WAKEUP_TYPE_MAX,
67 _WAKEUP_TYPE_INVALID = -1,
68 } WakeupType;
69
70 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
71
72 struct sd_event_source {
73 WakeupType wakeup;
74
75 unsigned n_ref;
76
77 sd_event *event;
78 void *userdata;
79 sd_event_handler_t prepare;
80
81 char *description;
82
83 EventSourceType type:5;
84 int enabled:3;
85 bool pending:1;
86 bool dispatching:1;
87 bool floating:1;
88
89 int64_t priority;
90 unsigned pending_index;
91 unsigned prepare_index;
92 unsigned pending_iteration;
93 unsigned prepare_iteration;
94
95 LIST_FIELDS(sd_event_source, sources);
96
97 union {
98 struct {
99 sd_event_io_handler_t callback;
100 int fd;
101 uint32_t events;
102 uint32_t revents;
103 bool registered:1;
104 } io;
105 struct {
106 sd_event_time_handler_t callback;
107 usec_t next, accuracy;
108 unsigned earliest_index;
109 unsigned latest_index;
110 } time;
111 struct {
112 sd_event_signal_handler_t callback;
113 struct signalfd_siginfo siginfo;
114 int sig;
115 } signal;
116 struct {
117 sd_event_child_handler_t callback;
118 siginfo_t siginfo;
119 pid_t pid;
120 int options;
121 } child;
122 struct {
123 sd_event_handler_t callback;
124 } defer;
125 struct {
126 sd_event_handler_t callback;
127 } post;
128 struct {
129 sd_event_handler_t callback;
130 unsigned prioq_index;
131 } exit;
132 };
133 };
134
135 struct clock_data {
136 WakeupType wakeup;
137 int fd;
138
139 /* For all clocks we maintain two priority queues each, one
140 * ordered for the earliest times the events may be
141 * dispatched, and one ordered by the latest times they must
142 * have been dispatched. The range between the top entries in
143 * the two prioqs is the time window we can freely schedule
144 * wakeups in */
145
146 Prioq *earliest;
147 Prioq *latest;
148 usec_t next;
149
150 bool needs_rearm:1;
151 };
152
153 struct signal_data {
154 WakeupType wakeup;
155
156 /* For each priority we maintain one signal fd, so that we
157 * only have to dequeue a single event per priority at a
158 * time. */
159
160 int fd;
161 int64_t priority;
162 sigset_t sigset;
163 sd_event_source *current;
164 };
165
166 struct sd_event {
167 unsigned n_ref;
168
169 int epoll_fd;
170 int watchdog_fd;
171
172 Prioq *pending;
173 Prioq *prepare;
174
175 /* timerfd_create() only supports these five clocks so far. We
176 * can add support for more clocks when the kernel learns to
177 * deal with them, too. */
178 struct clock_data realtime;
179 struct clock_data boottime;
180 struct clock_data monotonic;
181 struct clock_data realtime_alarm;
182 struct clock_data boottime_alarm;
183
184 usec_t perturb;
185
186 sd_event_source **signal_sources; /* indexed by signal number */
187 Hashmap *signal_data; /* indexed by priority */
188
189 Hashmap *child_sources;
190 unsigned n_enabled_child_sources;
191
192 Set *post_sources;
193
194 Prioq *exit;
195
196 pid_t original_pid;
197
198 unsigned iteration;
199 dual_timestamp timestamp;
200 usec_t timestamp_boottime;
201 int state;
202
203 bool exit_requested:1;
204 bool need_process_child:1;
205 bool watchdog:1;
206
207 int exit_code;
208
209 pid_t tid;
210 sd_event **default_event_ptr;
211
212 usec_t watchdog_last, watchdog_period;
213
214 unsigned n_sources;
215
216 LIST_HEAD(sd_event_source, sources);
217 };
218
219 static void source_disconnect(sd_event_source *s);
220
221 static int pending_prioq_compare(const void *a, const void *b) {
222 const sd_event_source *x = a, *y = b;
223
224 assert(x->pending);
225 assert(y->pending);
226
227 /* Enabled ones first */
228 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
229 return -1;
230 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
231 return 1;
232
233 /* Lower priority values first */
234 if (x->priority < y->priority)
235 return -1;
236 if (x->priority > y->priority)
237 return 1;
238
239 /* Older entries first */
240 if (x->pending_iteration < y->pending_iteration)
241 return -1;
242 if (x->pending_iteration > y->pending_iteration)
243 return 1;
244
245 /* Stability for the rest */
246 if (x < y)
247 return -1;
248 if (x > y)
249 return 1;
250
251 return 0;
252 }
253
254 static int prepare_prioq_compare(const void *a, const void *b) {
255 const sd_event_source *x = a, *y = b;
256
257 assert(x->prepare);
258 assert(y->prepare);
259
260 /* Move most recently prepared ones last, so that we can stop
261 * preparing as soon as we hit one that has already been
262 * prepared in the current iteration */
263 if (x->prepare_iteration < y->prepare_iteration)
264 return -1;
265 if (x->prepare_iteration > y->prepare_iteration)
266 return 1;
267
268 /* Enabled ones first */
269 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
270 return -1;
271 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
272 return 1;
273
274 /* Lower priority values first */
275 if (x->priority < y->priority)
276 return -1;
277 if (x->priority > y->priority)
278 return 1;
279
280 /* Stability for the rest */
281 if (x < y)
282 return -1;
283 if (x > y)
284 return 1;
285
286 return 0;
287 }
288
289 static int earliest_time_prioq_compare(const void *a, const void *b) {
290 const sd_event_source *x = a, *y = b;
291
292 assert(EVENT_SOURCE_IS_TIME(x->type));
293 assert(x->type == y->type);
294
295 /* Enabled ones first */
296 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
297 return -1;
298 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
299 return 1;
300
301 /* Move the pending ones to the end */
302 if (!x->pending && y->pending)
303 return -1;
304 if (x->pending && !y->pending)
305 return 1;
306
307 /* Order by time */
308 if (x->time.next < y->time.next)
309 return -1;
310 if (x->time.next > y->time.next)
311 return 1;
312
313 /* Stability for the rest */
314 if (x < y)
315 return -1;
316 if (x > y)
317 return 1;
318
319 return 0;
320 }
321
322 static int latest_time_prioq_compare(const void *a, const void *b) {
323 const sd_event_source *x = a, *y = b;
324
325 assert(EVENT_SOURCE_IS_TIME(x->type));
326 assert(x->type == y->type);
327
328 /* Enabled ones first */
329 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
330 return -1;
331 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
332 return 1;
333
334 /* Move the pending ones to the end */
335 if (!x->pending && y->pending)
336 return -1;
337 if (x->pending && !y->pending)
338 return 1;
339
340 /* Order by time */
341 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
342 return -1;
343 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
344 return 1;
345
346 /* Stability for the rest */
347 if (x < y)
348 return -1;
349 if (x > y)
350 return 1;
351
352 return 0;
353 }
354
355 static int exit_prioq_compare(const void *a, const void *b) {
356 const sd_event_source *x = a, *y = b;
357
358 assert(x->type == SOURCE_EXIT);
359 assert(y->type == SOURCE_EXIT);
360
361 /* Enabled ones first */
362 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
363 return -1;
364 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
365 return 1;
366
367 /* Lower priority values first */
368 if (x->priority < y->priority)
369 return -1;
370 if (x->priority > y->priority)
371 return 1;
372
373 /* Stability for the rest */
374 if (x < y)
375 return -1;
376 if (x > y)
377 return 1;
378
379 return 0;
380 }
381
382 static void free_clock_data(struct clock_data *d) {
383 assert(d);
384 assert(d->wakeup == WAKEUP_CLOCK_DATA);
385
386 safe_close(d->fd);
387 prioq_free(d->earliest);
388 prioq_free(d->latest);
389 }
390
391 static void event_free(sd_event *e) {
392 sd_event_source *s;
393
394 assert(e);
395
396 while ((s = e->sources)) {
397 assert(s->floating);
398 source_disconnect(s);
399 sd_event_source_unref(s);
400 }
401
402 assert(e->n_sources == 0);
403
404 if (e->default_event_ptr)
405 *(e->default_event_ptr) = NULL;
406
407 safe_close(e->epoll_fd);
408 safe_close(e->watchdog_fd);
409
410 free_clock_data(&e->realtime);
411 free_clock_data(&e->boottime);
412 free_clock_data(&e->monotonic);
413 free_clock_data(&e->realtime_alarm);
414 free_clock_data(&e->boottime_alarm);
415
416 prioq_free(e->pending);
417 prioq_free(e->prepare);
418 prioq_free(e->exit);
419
420 free(e->signal_sources);
421 hashmap_free(e->signal_data);
422
423 hashmap_free(e->child_sources);
424 set_free(e->post_sources);
425 free(e);
426 }
427
428 _public_ int sd_event_new(sd_event** ret) {
429 sd_event *e;
430 int r;
431
432 assert_return(ret, -EINVAL);
433
434 e = new0(sd_event, 1);
435 if (!e)
436 return -ENOMEM;
437
438 e->n_ref = 1;
439 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
440 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
441 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
442 e->original_pid = getpid();
443 e->perturb = USEC_INFINITY;
444
445 e->pending = prioq_new(pending_prioq_compare);
446 if (!e->pending) {
447 r = -ENOMEM;
448 goto fail;
449 }
450
451 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
452 if (e->epoll_fd < 0) {
453 r = -errno;
454 goto fail;
455 }
456
457 *ret = e;
458 return 0;
459
460 fail:
461 event_free(e);
462 return r;
463 }
464
465 _public_ sd_event* sd_event_ref(sd_event *e) {
466 assert_return(e, NULL);
467
468 assert(e->n_ref >= 1);
469 e->n_ref++;
470
471 return e;
472 }
473
474 _public_ sd_event* sd_event_unref(sd_event *e) {
475
476 if (!e)
477 return NULL;
478
479 assert(e->n_ref >= 1);
480 e->n_ref--;
481
482 if (e->n_ref <= 0)
483 event_free(e);
484
485 return NULL;
486 }
487
488 static bool event_pid_changed(sd_event *e) {
489 assert(e);
490
491 /* We don't support people creating an event loop and keeping
492 * it around over a fork(). Let's complain. */
493
494 return e->original_pid != getpid();
495 }
496
497 static void source_io_unregister(sd_event_source *s) {
498 int r;
499
500 assert(s);
501 assert(s->type == SOURCE_IO);
502
503 if (event_pid_changed(s->event))
504 return;
505
506 if (!s->io.registered)
507 return;
508
509 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
510 if (r < 0)
511 log_debug_errno(errno, "Failed to remove source %s from epoll: %m", strna(s->description));
512
513 s->io.registered = false;
514 }
515
516 static int source_io_register(
517 sd_event_source *s,
518 int enabled,
519 uint32_t events) {
520
521 struct epoll_event ev = {};
522 int r;
523
524 assert(s);
525 assert(s->type == SOURCE_IO);
526 assert(enabled != SD_EVENT_OFF);
527
528 ev.events = events;
529 ev.data.ptr = s;
530
531 if (enabled == SD_EVENT_ONESHOT)
532 ev.events |= EPOLLONESHOT;
533
534 if (s->io.registered)
535 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
536 else
537 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
538 if (r < 0)
539 return -errno;
540
541 s->io.registered = true;
542
543 return 0;
544 }
545
546 static clockid_t event_source_type_to_clock(EventSourceType t) {
547
548 switch (t) {
549
550 case SOURCE_TIME_REALTIME:
551 return CLOCK_REALTIME;
552
553 case SOURCE_TIME_BOOTTIME:
554 return CLOCK_BOOTTIME;
555
556 case SOURCE_TIME_MONOTONIC:
557 return CLOCK_MONOTONIC;
558
559 case SOURCE_TIME_REALTIME_ALARM:
560 return CLOCK_REALTIME_ALARM;
561
562 case SOURCE_TIME_BOOTTIME_ALARM:
563 return CLOCK_BOOTTIME_ALARM;
564
565 default:
566 return (clockid_t) -1;
567 }
568 }
569
570 static EventSourceType clock_to_event_source_type(clockid_t clock) {
571
572 switch (clock) {
573
574 case CLOCK_REALTIME:
575 return SOURCE_TIME_REALTIME;
576
577 case CLOCK_BOOTTIME:
578 return SOURCE_TIME_BOOTTIME;
579
580 case CLOCK_MONOTONIC:
581 return SOURCE_TIME_MONOTONIC;
582
583 case CLOCK_REALTIME_ALARM:
584 return SOURCE_TIME_REALTIME_ALARM;
585
586 case CLOCK_BOOTTIME_ALARM:
587 return SOURCE_TIME_BOOTTIME_ALARM;
588
589 default:
590 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
591 }
592 }
593
594 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
595 assert(e);
596
597 switch (t) {
598
599 case SOURCE_TIME_REALTIME:
600 return &e->realtime;
601
602 case SOURCE_TIME_BOOTTIME:
603 return &e->boottime;
604
605 case SOURCE_TIME_MONOTONIC:
606 return &e->monotonic;
607
608 case SOURCE_TIME_REALTIME_ALARM:
609 return &e->realtime_alarm;
610
611 case SOURCE_TIME_BOOTTIME_ALARM:
612 return &e->boottime_alarm;
613
614 default:
615 return NULL;
616 }
617 }
618
619 static int event_make_signal_data(
620 sd_event *e,
621 int sig,
622 struct signal_data **ret) {
623
624 struct epoll_event ev = {};
625 struct signal_data *d;
626 bool added = false;
627 sigset_t ss_copy;
628 int64_t priority;
629 int r;
630
631 assert(e);
632
633 if (event_pid_changed(e))
634 return -ECHILD;
635
636 if (e->signal_sources && e->signal_sources[sig])
637 priority = e->signal_sources[sig]->priority;
638 else
639 priority = 0;
640
641 d = hashmap_get(e->signal_data, &priority);
642 if (d) {
643 if (sigismember(&d->sigset, sig) > 0) {
644 if (ret)
645 *ret = d;
646 return 0;
647 }
648 } else {
649 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
650 if (r < 0)
651 return r;
652
653 d = new0(struct signal_data, 1);
654 if (!d)
655 return -ENOMEM;
656
657 d->wakeup = WAKEUP_SIGNAL_DATA;
658 d->fd = -1;
659 d->priority = priority;
660
661 r = hashmap_put(e->signal_data, &d->priority, d);
662 if (r < 0)
663 return r;
664
665 added = true;
666 }
667
668 ss_copy = d->sigset;
669 assert_se(sigaddset(&ss_copy, sig) >= 0);
670
671 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
672 if (r < 0) {
673 r = -errno;
674 goto fail;
675 }
676
677 d->sigset = ss_copy;
678
679 if (d->fd >= 0) {
680 if (ret)
681 *ret = d;
682 return 0;
683 }
684
685 d->fd = r;
686
687 ev.events = EPOLLIN;
688 ev.data.ptr = d;
689
690 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
691 if (r < 0) {
692 r = -errno;
693 goto fail;
694 }
695
696 if (ret)
697 *ret = d;
698
699 return 0;
700
701 fail:
702 if (added) {
703 d->fd = safe_close(d->fd);
704 hashmap_remove(e->signal_data, &d->priority);
705 free(d);
706 }
707
708 return r;
709 }
710
711 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
712 assert(e);
713 assert(d);
714
715 /* Turns off the specified signal in the signal data
716 * object. If the signal mask of the object becomes empty that
717 * way removes it. */
718
719 if (sigismember(&d->sigset, sig) == 0)
720 return;
721
722 assert_se(sigdelset(&d->sigset, sig) >= 0);
723
724 if (sigisemptyset(&d->sigset)) {
725
726 /* If all the mask is all-zero we can get rid of the structure */
727 hashmap_remove(e->signal_data, &d->priority);
728 assert(!d->current);
729 safe_close(d->fd);
730 free(d);
731 return;
732 }
733
734 assert(d->fd >= 0);
735
736 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
737 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
738 }
739
740 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
741 struct signal_data *d;
742 static const int64_t zero_priority = 0;
743
744 assert(e);
745
746 /* Rechecks if the specified signal is still something we are
747 * interested in. If not, we'll unmask it, and possibly drop
748 * the signalfd for it. */
749
750 if (sig == SIGCHLD &&
751 e->n_enabled_child_sources > 0)
752 return;
753
754 if (e->signal_sources &&
755 e->signal_sources[sig] &&
756 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
757 return;
758
759 /*
760 * The specified signal might be enabled in three different queues:
761 *
762 * 1) the one that belongs to the priority passed (if it is non-NULL)
763 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
764 * 3) the 0 priority (to cover the SIGCHLD case)
765 *
766 * Hence, let's remove it from all three here.
767 */
768
769 if (priority) {
770 d = hashmap_get(e->signal_data, priority);
771 if (d)
772 event_unmask_signal_data(e, d, sig);
773 }
774
775 if (e->signal_sources && e->signal_sources[sig]) {
776 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
777 if (d)
778 event_unmask_signal_data(e, d, sig);
779 }
780
781 d = hashmap_get(e->signal_data, &zero_priority);
782 if (d)
783 event_unmask_signal_data(e, d, sig);
784 }
785
786 static void source_disconnect(sd_event_source *s) {
787 sd_event *event;
788
789 assert(s);
790
791 if (!s->event)
792 return;
793
794 assert(s->event->n_sources > 0);
795
796 switch (s->type) {
797
798 case SOURCE_IO:
799 if (s->io.fd >= 0)
800 source_io_unregister(s);
801
802 break;
803
804 case SOURCE_TIME_REALTIME:
805 case SOURCE_TIME_BOOTTIME:
806 case SOURCE_TIME_MONOTONIC:
807 case SOURCE_TIME_REALTIME_ALARM:
808 case SOURCE_TIME_BOOTTIME_ALARM: {
809 struct clock_data *d;
810
811 d = event_get_clock_data(s->event, s->type);
812 assert(d);
813
814 prioq_remove(d->earliest, s, &s->time.earliest_index);
815 prioq_remove(d->latest, s, &s->time.latest_index);
816 d->needs_rearm = true;
817 break;
818 }
819
820 case SOURCE_SIGNAL:
821 if (s->signal.sig > 0) {
822
823 if (s->event->signal_sources)
824 s->event->signal_sources[s->signal.sig] = NULL;
825
826 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
827 }
828
829 break;
830
831 case SOURCE_CHILD:
832 if (s->child.pid > 0) {
833 if (s->enabled != SD_EVENT_OFF) {
834 assert(s->event->n_enabled_child_sources > 0);
835 s->event->n_enabled_child_sources--;
836 }
837
838 (void) hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
839 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
840 }
841
842 break;
843
844 case SOURCE_DEFER:
845 /* nothing */
846 break;
847
848 case SOURCE_POST:
849 set_remove(s->event->post_sources, s);
850 break;
851
852 case SOURCE_EXIT:
853 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
854 break;
855
856 default:
857 assert_not_reached("Wut? I shouldn't exist.");
858 }
859
860 if (s->pending)
861 prioq_remove(s->event->pending, s, &s->pending_index);
862
863 if (s->prepare)
864 prioq_remove(s->event->prepare, s, &s->prepare_index);
865
866 event = s->event;
867
868 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
869 s->event = NULL;
870 LIST_REMOVE(sources, event->sources, s);
871 event->n_sources--;
872
873 if (!s->floating)
874 sd_event_unref(event);
875 }
876
877 static void source_free(sd_event_source *s) {
878 assert(s);
879
880 source_disconnect(s);
881 free(s->description);
882 free(s);
883 }
884
885 static int source_set_pending(sd_event_source *s, bool b) {
886 int r;
887
888 assert(s);
889 assert(s->type != SOURCE_EXIT);
890
891 if (s->pending == b)
892 return 0;
893
894 s->pending = b;
895
896 if (b) {
897 s->pending_iteration = s->event->iteration;
898
899 r = prioq_put(s->event->pending, s, &s->pending_index);
900 if (r < 0) {
901 s->pending = false;
902 return r;
903 }
904 } else
905 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
906
907 if (EVENT_SOURCE_IS_TIME(s->type)) {
908 struct clock_data *d;
909
910 d = event_get_clock_data(s->event, s->type);
911 assert(d);
912
913 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
914 prioq_reshuffle(d->latest, s, &s->time.latest_index);
915 d->needs_rearm = true;
916 }
917
918 if (s->type == SOURCE_SIGNAL && !b) {
919 struct signal_data *d;
920
921 d = hashmap_get(s->event->signal_data, &s->priority);
922 if (d && d->current == s)
923 d->current = NULL;
924 }
925
926 return 0;
927 }
928
929 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
930 sd_event_source *s;
931
932 assert(e);
933
934 s = new0(sd_event_source, 1);
935 if (!s)
936 return NULL;
937
938 s->n_ref = 1;
939 s->event = e;
940 s->floating = floating;
941 s->type = type;
942 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
943
944 if (!floating)
945 sd_event_ref(e);
946
947 LIST_PREPEND(sources, e->sources, s);
948 e->n_sources ++;
949
950 return s;
951 }
952
953 _public_ int sd_event_add_io(
954 sd_event *e,
955 sd_event_source **ret,
956 int fd,
957 uint32_t events,
958 sd_event_io_handler_t callback,
959 void *userdata) {
960
961 sd_event_source *s;
962 int r;
963
964 assert_return(e, -EINVAL);
965 assert_return(fd >= 0, -EBADF);
966 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
967 assert_return(callback, -EINVAL);
968 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
969 assert_return(!event_pid_changed(e), -ECHILD);
970
971 s = source_new(e, !ret, SOURCE_IO);
972 if (!s)
973 return -ENOMEM;
974
975 s->wakeup = WAKEUP_EVENT_SOURCE;
976 s->io.fd = fd;
977 s->io.events = events;
978 s->io.callback = callback;
979 s->userdata = userdata;
980 s->enabled = SD_EVENT_ON;
981
982 r = source_io_register(s, s->enabled, events);
983 if (r < 0) {
984 source_free(s);
985 return r;
986 }
987
988 if (ret)
989 *ret = s;
990
991 return 0;
992 }
993
994 static void initialize_perturb(sd_event *e) {
995 sd_id128_t bootid = {};
996
997 /* When we sleep for longer, we try to realign the wakeup to
998 the same time wihtin each minute/second/250ms, so that
999 events all across the system can be coalesced into a single
1000 CPU wakeup. However, let's take some system-specific
1001 randomness for this value, so that in a network of systems
1002 with synced clocks timer events are distributed a
1003 bit. Here, we calculate a perturbation usec offset from the
1004 boot ID. */
1005
1006 if (_likely_(e->perturb != USEC_INFINITY))
1007 return;
1008
1009 if (sd_id128_get_boot(&bootid) >= 0)
1010 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1011 }
1012
1013 static int event_setup_timer_fd(
1014 sd_event *e,
1015 struct clock_data *d,
1016 clockid_t clock) {
1017
1018 struct epoll_event ev = {};
1019 int r, fd;
1020
1021 assert(e);
1022 assert(d);
1023
1024 if (_likely_(d->fd >= 0))
1025 return 0;
1026
1027 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1028 if (fd < 0)
1029 return -errno;
1030
1031 ev.events = EPOLLIN;
1032 ev.data.ptr = d;
1033
1034 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1035 if (r < 0) {
1036 safe_close(fd);
1037 return -errno;
1038 }
1039
1040 d->fd = fd;
1041 return 0;
1042 }
1043
1044 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1045 assert(s);
1046
1047 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1048 }
1049
1050 _public_ int sd_event_add_time(
1051 sd_event *e,
1052 sd_event_source **ret,
1053 clockid_t clock,
1054 uint64_t usec,
1055 uint64_t accuracy,
1056 sd_event_time_handler_t callback,
1057 void *userdata) {
1058
1059 EventSourceType type;
1060 sd_event_source *s;
1061 struct clock_data *d;
1062 int r;
1063
1064 assert_return(e, -EINVAL);
1065 assert_return(usec != (uint64_t) -1, -EINVAL);
1066 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1067 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1068 assert_return(!event_pid_changed(e), -ECHILD);
1069
1070 if (!callback)
1071 callback = time_exit_callback;
1072
1073 type = clock_to_event_source_type(clock);
1074 assert_return(type >= 0, -EOPNOTSUPP);
1075
1076 d = event_get_clock_data(e, type);
1077 assert(d);
1078
1079 if (!d->earliest) {
1080 d->earliest = prioq_new(earliest_time_prioq_compare);
1081 if (!d->earliest)
1082 return -ENOMEM;
1083 }
1084
1085 if (!d->latest) {
1086 d->latest = prioq_new(latest_time_prioq_compare);
1087 if (!d->latest)
1088 return -ENOMEM;
1089 }
1090
1091 if (d->fd < 0) {
1092 r = event_setup_timer_fd(e, d, clock);
1093 if (r < 0)
1094 return r;
1095 }
1096
1097 s = source_new(e, !ret, type);
1098 if (!s)
1099 return -ENOMEM;
1100
1101 s->time.next = usec;
1102 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1103 s->time.callback = callback;
1104 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1105 s->userdata = userdata;
1106 s->enabled = SD_EVENT_ONESHOT;
1107
1108 d->needs_rearm = true;
1109
1110 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1111 if (r < 0)
1112 goto fail;
1113
1114 r = prioq_put(d->latest, s, &s->time.latest_index);
1115 if (r < 0)
1116 goto fail;
1117
1118 if (ret)
1119 *ret = s;
1120
1121 return 0;
1122
1123 fail:
1124 source_free(s);
1125 return r;
1126 }
1127
1128 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1129 assert(s);
1130
1131 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1132 }
1133
1134 _public_ int sd_event_add_signal(
1135 sd_event *e,
1136 sd_event_source **ret,
1137 int sig,
1138 sd_event_signal_handler_t callback,
1139 void *userdata) {
1140
1141 sd_event_source *s;
1142 struct signal_data *d;
1143 sigset_t ss;
1144 int r;
1145
1146 assert_return(e, -EINVAL);
1147 assert_return(sig > 0, -EINVAL);
1148 assert_return(sig < _NSIG, -EINVAL);
1149 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1150 assert_return(!event_pid_changed(e), -ECHILD);
1151
1152 if (!callback)
1153 callback = signal_exit_callback;
1154
1155 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1156 if (r < 0)
1157 return -errno;
1158
1159 if (!sigismember(&ss, sig))
1160 return -EBUSY;
1161
1162 if (!e->signal_sources) {
1163 e->signal_sources = new0(sd_event_source*, _NSIG);
1164 if (!e->signal_sources)
1165 return -ENOMEM;
1166 } else if (e->signal_sources[sig])
1167 return -EBUSY;
1168
1169 s = source_new(e, !ret, SOURCE_SIGNAL);
1170 if (!s)
1171 return -ENOMEM;
1172
1173 s->signal.sig = sig;
1174 s->signal.callback = callback;
1175 s->userdata = userdata;
1176 s->enabled = SD_EVENT_ON;
1177
1178 e->signal_sources[sig] = s;
1179
1180 r = event_make_signal_data(e, sig, &d);
1181 if (r < 0) {
1182 source_free(s);
1183 return r;
1184 }
1185
1186 /* Use the signal name as description for the event source by default */
1187 (void) sd_event_source_set_description(s, signal_to_string(sig));
1188
1189 if (ret)
1190 *ret = s;
1191
1192 return 0;
1193 }
1194
1195 _public_ int sd_event_add_child(
1196 sd_event *e,
1197 sd_event_source **ret,
1198 pid_t pid,
1199 int options,
1200 sd_event_child_handler_t callback,
1201 void *userdata) {
1202
1203 sd_event_source *s;
1204 int r;
1205
1206 assert_return(e, -EINVAL);
1207 assert_return(pid > 1, -EINVAL);
1208 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1209 assert_return(options != 0, -EINVAL);
1210 assert_return(callback, -EINVAL);
1211 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1212 assert_return(!event_pid_changed(e), -ECHILD);
1213
1214 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1215 if (r < 0)
1216 return r;
1217
1218 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1219 return -EBUSY;
1220
1221 s = source_new(e, !ret, SOURCE_CHILD);
1222 if (!s)
1223 return -ENOMEM;
1224
1225 s->child.pid = pid;
1226 s->child.options = options;
1227 s->child.callback = callback;
1228 s->userdata = userdata;
1229 s->enabled = SD_EVENT_ONESHOT;
1230
1231 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1232 if (r < 0) {
1233 source_free(s);
1234 return r;
1235 }
1236
1237 e->n_enabled_child_sources ++;
1238
1239 r = event_make_signal_data(e, SIGCHLD, NULL);
1240 if (r < 0) {
1241 e->n_enabled_child_sources--;
1242 source_free(s);
1243 return r;
1244 }
1245
1246 e->need_process_child = true;
1247
1248 if (ret)
1249 *ret = s;
1250
1251 return 0;
1252 }
1253
1254 _public_ int sd_event_add_defer(
1255 sd_event *e,
1256 sd_event_source **ret,
1257 sd_event_handler_t callback,
1258 void *userdata) {
1259
1260 sd_event_source *s;
1261 int r;
1262
1263 assert_return(e, -EINVAL);
1264 assert_return(callback, -EINVAL);
1265 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1266 assert_return(!event_pid_changed(e), -ECHILD);
1267
1268 s = source_new(e, !ret, SOURCE_DEFER);
1269 if (!s)
1270 return -ENOMEM;
1271
1272 s->defer.callback = callback;
1273 s->userdata = userdata;
1274 s->enabled = SD_EVENT_ONESHOT;
1275
1276 r = source_set_pending(s, true);
1277 if (r < 0) {
1278 source_free(s);
1279 return r;
1280 }
1281
1282 if (ret)
1283 *ret = s;
1284
1285 return 0;
1286 }
1287
1288 _public_ int sd_event_add_post(
1289 sd_event *e,
1290 sd_event_source **ret,
1291 sd_event_handler_t callback,
1292 void *userdata) {
1293
1294 sd_event_source *s;
1295 int r;
1296
1297 assert_return(e, -EINVAL);
1298 assert_return(callback, -EINVAL);
1299 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1300 assert_return(!event_pid_changed(e), -ECHILD);
1301
1302 r = set_ensure_allocated(&e->post_sources, NULL);
1303 if (r < 0)
1304 return r;
1305
1306 s = source_new(e, !ret, SOURCE_POST);
1307 if (!s)
1308 return -ENOMEM;
1309
1310 s->post.callback = callback;
1311 s->userdata = userdata;
1312 s->enabled = SD_EVENT_ON;
1313
1314 r = set_put(e->post_sources, s);
1315 if (r < 0) {
1316 source_free(s);
1317 return r;
1318 }
1319
1320 if (ret)
1321 *ret = s;
1322
1323 return 0;
1324 }
1325
1326 _public_ int sd_event_add_exit(
1327 sd_event *e,
1328 sd_event_source **ret,
1329 sd_event_handler_t callback,
1330 void *userdata) {
1331
1332 sd_event_source *s;
1333 int r;
1334
1335 assert_return(e, -EINVAL);
1336 assert_return(callback, -EINVAL);
1337 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1338 assert_return(!event_pid_changed(e), -ECHILD);
1339
1340 if (!e->exit) {
1341 e->exit = prioq_new(exit_prioq_compare);
1342 if (!e->exit)
1343 return -ENOMEM;
1344 }
1345
1346 s = source_new(e, !ret, SOURCE_EXIT);
1347 if (!s)
1348 return -ENOMEM;
1349
1350 s->exit.callback = callback;
1351 s->userdata = userdata;
1352 s->exit.prioq_index = PRIOQ_IDX_NULL;
1353 s->enabled = SD_EVENT_ONESHOT;
1354
1355 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1356 if (r < 0) {
1357 source_free(s);
1358 return r;
1359 }
1360
1361 if (ret)
1362 *ret = s;
1363
1364 return 0;
1365 }
1366
1367 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1368 assert_return(s, NULL);
1369
1370 assert(s->n_ref >= 1);
1371 s->n_ref++;
1372
1373 return s;
1374 }
1375
1376 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1377
1378 if (!s)
1379 return NULL;
1380
1381 assert(s->n_ref >= 1);
1382 s->n_ref--;
1383
1384 if (s->n_ref <= 0) {
1385 /* Here's a special hack: when we are called from a
1386 * dispatch handler we won't free the event source
1387 * immediately, but we will detach the fd from the
1388 * epoll. This way it is safe for the caller to unref
1389 * the event source and immediately close the fd, but
1390 * we still retain a valid event source object after
1391 * the callback. */
1392
1393 if (s->dispatching) {
1394 if (s->type == SOURCE_IO)
1395 source_io_unregister(s);
1396
1397 source_disconnect(s);
1398 } else
1399 source_free(s);
1400 }
1401
1402 return NULL;
1403 }
1404
1405 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1406 assert_return(s, -EINVAL);
1407 assert_return(!event_pid_changed(s->event), -ECHILD);
1408
1409 return free_and_strdup(&s->description, description);
1410 }
1411
1412 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1413 assert_return(s, -EINVAL);
1414 assert_return(description, -EINVAL);
1415 assert_return(s->description, -ENXIO);
1416 assert_return(!event_pid_changed(s->event), -ECHILD);
1417
1418 *description = s->description;
1419 return 0;
1420 }
1421
1422 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1423 assert_return(s, NULL);
1424
1425 return s->event;
1426 }
1427
1428 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1429 assert_return(s, -EINVAL);
1430 assert_return(s->type != SOURCE_EXIT, -EDOM);
1431 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1432 assert_return(!event_pid_changed(s->event), -ECHILD);
1433
1434 return s->pending;
1435 }
1436
1437 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1438 assert_return(s, -EINVAL);
1439 assert_return(s->type == SOURCE_IO, -EDOM);
1440 assert_return(!event_pid_changed(s->event), -ECHILD);
1441
1442 return s->io.fd;
1443 }
1444
1445 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1446 int r;
1447
1448 assert_return(s, -EINVAL);
1449 assert_return(fd >= 0, -EBADF);
1450 assert_return(s->type == SOURCE_IO, -EDOM);
1451 assert_return(!event_pid_changed(s->event), -ECHILD);
1452
1453 if (s->io.fd == fd)
1454 return 0;
1455
1456 if (s->enabled == SD_EVENT_OFF) {
1457 s->io.fd = fd;
1458 s->io.registered = false;
1459 } else {
1460 int saved_fd;
1461
1462 saved_fd = s->io.fd;
1463 assert(s->io.registered);
1464
1465 s->io.fd = fd;
1466 s->io.registered = false;
1467
1468 r = source_io_register(s, s->enabled, s->io.events);
1469 if (r < 0) {
1470 s->io.fd = saved_fd;
1471 s->io.registered = true;
1472 return r;
1473 }
1474
1475 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1476 }
1477
1478 return 0;
1479 }
1480
1481 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1482 assert_return(s, -EINVAL);
1483 assert_return(events, -EINVAL);
1484 assert_return(s->type == SOURCE_IO, -EDOM);
1485 assert_return(!event_pid_changed(s->event), -ECHILD);
1486
1487 *events = s->io.events;
1488 return 0;
1489 }
1490
1491 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1492 int r;
1493
1494 assert_return(s, -EINVAL);
1495 assert_return(s->type == SOURCE_IO, -EDOM);
1496 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1497 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1498 assert_return(!event_pid_changed(s->event), -ECHILD);
1499
1500 /* edge-triggered updates are never skipped, so we can reset edges */
1501 if (s->io.events == events && !(events & EPOLLET))
1502 return 0;
1503
1504 if (s->enabled != SD_EVENT_OFF) {
1505 r = source_io_register(s, s->enabled, events);
1506 if (r < 0)
1507 return r;
1508 }
1509
1510 s->io.events = events;
1511 source_set_pending(s, false);
1512
1513 return 0;
1514 }
1515
1516 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1517 assert_return(s, -EINVAL);
1518 assert_return(revents, -EINVAL);
1519 assert_return(s->type == SOURCE_IO, -EDOM);
1520 assert_return(s->pending, -ENODATA);
1521 assert_return(!event_pid_changed(s->event), -ECHILD);
1522
1523 *revents = s->io.revents;
1524 return 0;
1525 }
1526
1527 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1528 assert_return(s, -EINVAL);
1529 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1530 assert_return(!event_pid_changed(s->event), -ECHILD);
1531
1532 return s->signal.sig;
1533 }
1534
1535 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1536 assert_return(s, -EINVAL);
1537 assert_return(!event_pid_changed(s->event), -ECHILD);
1538
1539 return s->priority;
1540 }
1541
1542 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1543 int r;
1544
1545 assert_return(s, -EINVAL);
1546 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1547 assert_return(!event_pid_changed(s->event), -ECHILD);
1548
1549 if (s->priority == priority)
1550 return 0;
1551
1552 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1553 struct signal_data *old, *d;
1554
1555 /* Move us from the signalfd belonging to the old
1556 * priority to the signalfd of the new priority */
1557
1558 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1559
1560 s->priority = priority;
1561
1562 r = event_make_signal_data(s->event, s->signal.sig, &d);
1563 if (r < 0) {
1564 s->priority = old->priority;
1565 return r;
1566 }
1567
1568 event_unmask_signal_data(s->event, old, s->signal.sig);
1569 } else
1570 s->priority = priority;
1571
1572 if (s->pending)
1573 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1574
1575 if (s->prepare)
1576 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1577
1578 if (s->type == SOURCE_EXIT)
1579 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1580
1581 return 0;
1582 }
1583
1584 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1585 assert_return(s, -EINVAL);
1586 assert_return(m, -EINVAL);
1587 assert_return(!event_pid_changed(s->event), -ECHILD);
1588
1589 *m = s->enabled;
1590 return 0;
1591 }
1592
1593 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1594 int r;
1595
1596 assert_return(s, -EINVAL);
1597 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1598 assert_return(!event_pid_changed(s->event), -ECHILD);
1599
1600 /* If we are dead anyway, we are fine with turning off
1601 * sources, but everything else needs to fail. */
1602 if (s->event->state == SD_EVENT_FINISHED)
1603 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1604
1605 if (s->enabled == m)
1606 return 0;
1607
1608 if (m == SD_EVENT_OFF) {
1609
1610 switch (s->type) {
1611
1612 case SOURCE_IO:
1613 source_io_unregister(s);
1614 s->enabled = m;
1615 break;
1616
1617 case SOURCE_TIME_REALTIME:
1618 case SOURCE_TIME_BOOTTIME:
1619 case SOURCE_TIME_MONOTONIC:
1620 case SOURCE_TIME_REALTIME_ALARM:
1621 case SOURCE_TIME_BOOTTIME_ALARM: {
1622 struct clock_data *d;
1623
1624 s->enabled = m;
1625 d = event_get_clock_data(s->event, s->type);
1626 assert(d);
1627
1628 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1629 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1630 d->needs_rearm = true;
1631 break;
1632 }
1633
1634 case SOURCE_SIGNAL:
1635 s->enabled = m;
1636
1637 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1638 break;
1639
1640 case SOURCE_CHILD:
1641 s->enabled = m;
1642
1643 assert(s->event->n_enabled_child_sources > 0);
1644 s->event->n_enabled_child_sources--;
1645
1646 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1647 break;
1648
1649 case SOURCE_EXIT:
1650 s->enabled = m;
1651 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1652 break;
1653
1654 case SOURCE_DEFER:
1655 case SOURCE_POST:
1656 s->enabled = m;
1657 break;
1658
1659 default:
1660 assert_not_reached("Wut? I shouldn't exist.");
1661 }
1662
1663 } else {
1664 switch (s->type) {
1665
1666 case SOURCE_IO:
1667 r = source_io_register(s, m, s->io.events);
1668 if (r < 0)
1669 return r;
1670
1671 s->enabled = m;
1672 break;
1673
1674 case SOURCE_TIME_REALTIME:
1675 case SOURCE_TIME_BOOTTIME:
1676 case SOURCE_TIME_MONOTONIC:
1677 case SOURCE_TIME_REALTIME_ALARM:
1678 case SOURCE_TIME_BOOTTIME_ALARM: {
1679 struct clock_data *d;
1680
1681 s->enabled = m;
1682 d = event_get_clock_data(s->event, s->type);
1683 assert(d);
1684
1685 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1686 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1687 d->needs_rearm = true;
1688 break;
1689 }
1690
1691 case SOURCE_SIGNAL:
1692
1693 s->enabled = m;
1694
1695 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1696 if (r < 0) {
1697 s->enabled = SD_EVENT_OFF;
1698 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1699 return r;
1700 }
1701
1702 break;
1703
1704 case SOURCE_CHILD:
1705
1706 if (s->enabled == SD_EVENT_OFF)
1707 s->event->n_enabled_child_sources++;
1708
1709 s->enabled = m;
1710
1711 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1712 if (r < 0) {
1713 s->enabled = SD_EVENT_OFF;
1714 s->event->n_enabled_child_sources--;
1715 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1716 return r;
1717 }
1718
1719 break;
1720
1721 case SOURCE_EXIT:
1722 s->enabled = m;
1723 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1724 break;
1725
1726 case SOURCE_DEFER:
1727 case SOURCE_POST:
1728 s->enabled = m;
1729 break;
1730
1731 default:
1732 assert_not_reached("Wut? I shouldn't exist.");
1733 }
1734 }
1735
1736 if (s->pending)
1737 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1738
1739 if (s->prepare)
1740 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1741
1742 return 0;
1743 }
1744
1745 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1746 assert_return(s, -EINVAL);
1747 assert_return(usec, -EINVAL);
1748 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1749 assert_return(!event_pid_changed(s->event), -ECHILD);
1750
1751 *usec = s->time.next;
1752 return 0;
1753 }
1754
1755 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1756 struct clock_data *d;
1757
1758 assert_return(s, -EINVAL);
1759 assert_return(usec != (uint64_t) -1, -EINVAL);
1760 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1761 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1762 assert_return(!event_pid_changed(s->event), -ECHILD);
1763
1764 s->time.next = usec;
1765
1766 source_set_pending(s, false);
1767
1768 d = event_get_clock_data(s->event, s->type);
1769 assert(d);
1770
1771 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1772 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1773 d->needs_rearm = true;
1774
1775 return 0;
1776 }
1777
1778 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1779 assert_return(s, -EINVAL);
1780 assert_return(usec, -EINVAL);
1781 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1782 assert_return(!event_pid_changed(s->event), -ECHILD);
1783
1784 *usec = s->time.accuracy;
1785 return 0;
1786 }
1787
1788 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1789 struct clock_data *d;
1790
1791 assert_return(s, -EINVAL);
1792 assert_return(usec != (uint64_t) -1, -EINVAL);
1793 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1794 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1795 assert_return(!event_pid_changed(s->event), -ECHILD);
1796
1797 if (usec == 0)
1798 usec = DEFAULT_ACCURACY_USEC;
1799
1800 s->time.accuracy = usec;
1801
1802 source_set_pending(s, false);
1803
1804 d = event_get_clock_data(s->event, s->type);
1805 assert(d);
1806
1807 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1808 d->needs_rearm = true;
1809
1810 return 0;
1811 }
1812
1813 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1814 assert_return(s, -EINVAL);
1815 assert_return(clock, -EINVAL);
1816 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1817 assert_return(!event_pid_changed(s->event), -ECHILD);
1818
1819 *clock = event_source_type_to_clock(s->type);
1820 return 0;
1821 }
1822
1823 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1824 assert_return(s, -EINVAL);
1825 assert_return(pid, -EINVAL);
1826 assert_return(s->type == SOURCE_CHILD, -EDOM);
1827 assert_return(!event_pid_changed(s->event), -ECHILD);
1828
1829 *pid = s->child.pid;
1830 return 0;
1831 }
1832
1833 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1834 int r;
1835
1836 assert_return(s, -EINVAL);
1837 assert_return(s->type != SOURCE_EXIT, -EDOM);
1838 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1839 assert_return(!event_pid_changed(s->event), -ECHILD);
1840
1841 if (s->prepare == callback)
1842 return 0;
1843
1844 if (callback && s->prepare) {
1845 s->prepare = callback;
1846 return 0;
1847 }
1848
1849 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1850 if (r < 0)
1851 return r;
1852
1853 s->prepare = callback;
1854
1855 if (callback) {
1856 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1857 if (r < 0)
1858 return r;
1859 } else
1860 prioq_remove(s->event->prepare, s, &s->prepare_index);
1861
1862 return 0;
1863 }
1864
1865 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1866 assert_return(s, NULL);
1867
1868 return s->userdata;
1869 }
1870
1871 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1872 void *ret;
1873
1874 assert_return(s, NULL);
1875
1876 ret = s->userdata;
1877 s->userdata = userdata;
1878
1879 return ret;
1880 }
1881
1882 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1883 usec_t c;
1884 assert(e);
1885 assert(a <= b);
1886
1887 if (a <= 0)
1888 return 0;
1889
1890 if (b <= a + 1)
1891 return a;
1892
1893 initialize_perturb(e);
1894
1895 /*
1896 Find a good time to wake up again between times a and b. We
1897 have two goals here:
1898
1899 a) We want to wake up as seldom as possible, hence prefer
1900 later times over earlier times.
1901
1902 b) But if we have to wake up, then let's make sure to
1903 dispatch as much as possible on the entire system.
1904
1905 We implement this by waking up everywhere at the same time
1906 within any given minute if we can, synchronised via the
1907 perturbation value determined from the boot ID. If we can't,
1908 then we try to find the same spot in every 10s, then 1s and
1909 then 250ms step. Otherwise, we pick the last possible time
1910 to wake up.
1911 */
1912
1913 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1914 if (c >= b) {
1915 if (_unlikely_(c < USEC_PER_MINUTE))
1916 return b;
1917
1918 c -= USEC_PER_MINUTE;
1919 }
1920
1921 if (c >= a)
1922 return c;
1923
1924 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1925 if (c >= b) {
1926 if (_unlikely_(c < USEC_PER_SEC*10))
1927 return b;
1928
1929 c -= USEC_PER_SEC*10;
1930 }
1931
1932 if (c >= a)
1933 return c;
1934
1935 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1936 if (c >= b) {
1937 if (_unlikely_(c < USEC_PER_SEC))
1938 return b;
1939
1940 c -= USEC_PER_SEC;
1941 }
1942
1943 if (c >= a)
1944 return c;
1945
1946 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1947 if (c >= b) {
1948 if (_unlikely_(c < USEC_PER_MSEC*250))
1949 return b;
1950
1951 c -= USEC_PER_MSEC*250;
1952 }
1953
1954 if (c >= a)
1955 return c;
1956
1957 return b;
1958 }
1959
1960 static int event_arm_timer(
1961 sd_event *e,
1962 struct clock_data *d) {
1963
1964 struct itimerspec its = {};
1965 sd_event_source *a, *b;
1966 usec_t t;
1967 int r;
1968
1969 assert(e);
1970 assert(d);
1971
1972 if (!d->needs_rearm)
1973 return 0;
1974 else
1975 d->needs_rearm = false;
1976
1977 a = prioq_peek(d->earliest);
1978 if (!a || a->enabled == SD_EVENT_OFF) {
1979
1980 if (d->fd < 0)
1981 return 0;
1982
1983 if (d->next == USEC_INFINITY)
1984 return 0;
1985
1986 /* disarm */
1987 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1988 if (r < 0)
1989 return r;
1990
1991 d->next = USEC_INFINITY;
1992 return 0;
1993 }
1994
1995 b = prioq_peek(d->latest);
1996 assert_se(b && b->enabled != SD_EVENT_OFF);
1997
1998 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1999 if (d->next == t)
2000 return 0;
2001
2002 assert_se(d->fd >= 0);
2003
2004 if (t == 0) {
2005 /* We don' want to disarm here, just mean some time looooong ago. */
2006 its.it_value.tv_sec = 0;
2007 its.it_value.tv_nsec = 1;
2008 } else
2009 timespec_store(&its.it_value, t);
2010
2011 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2012 if (r < 0)
2013 return -errno;
2014
2015 d->next = t;
2016 return 0;
2017 }
2018
2019 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2020 assert(e);
2021 assert(s);
2022 assert(s->type == SOURCE_IO);
2023
2024 /* If the event source was already pending, we just OR in the
2025 * new revents, otherwise we reset the value. The ORing is
2026 * necessary to handle EPOLLONESHOT events properly where
2027 * readability might happen independently of writability, and
2028 * we need to keep track of both */
2029
2030 if (s->pending)
2031 s->io.revents |= revents;
2032 else
2033 s->io.revents = revents;
2034
2035 return source_set_pending(s, true);
2036 }
2037
2038 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2039 uint64_t x;
2040 ssize_t ss;
2041
2042 assert(e);
2043 assert(fd >= 0);
2044
2045 assert_return(events == EPOLLIN, -EIO);
2046
2047 ss = read(fd, &x, sizeof(x));
2048 if (ss < 0) {
2049 if (errno == EAGAIN || errno == EINTR)
2050 return 0;
2051
2052 return -errno;
2053 }
2054
2055 if (_unlikely_(ss != sizeof(x)))
2056 return -EIO;
2057
2058 if (next)
2059 *next = USEC_INFINITY;
2060
2061 return 0;
2062 }
2063
2064 static int process_timer(
2065 sd_event *e,
2066 usec_t n,
2067 struct clock_data *d) {
2068
2069 sd_event_source *s;
2070 int r;
2071
2072 assert(e);
2073 assert(d);
2074
2075 for (;;) {
2076 s = prioq_peek(d->earliest);
2077 if (!s ||
2078 s->time.next > n ||
2079 s->enabled == SD_EVENT_OFF ||
2080 s->pending)
2081 break;
2082
2083 r = source_set_pending(s, true);
2084 if (r < 0)
2085 return r;
2086
2087 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2088 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2089 d->needs_rearm = true;
2090 }
2091
2092 return 0;
2093 }
2094
2095 static int process_child(sd_event *e) {
2096 sd_event_source *s;
2097 Iterator i;
2098 int r;
2099
2100 assert(e);
2101
2102 e->need_process_child = false;
2103
2104 /*
2105 So, this is ugly. We iteratively invoke waitid() with P_PID
2106 + WNOHANG for each PID we wait for, instead of using
2107 P_ALL. This is because we only want to get child
2108 information of very specific child processes, and not all
2109 of them. We might not have processed the SIGCHLD even of a
2110 previous invocation and we don't want to maintain a
2111 unbounded *per-child* event queue, hence we really don't
2112 want anything flushed out of the kernel's queue that we
2113 don't care about. Since this is O(n) this means that if you
2114 have a lot of processes you probably want to handle SIGCHLD
2115 yourself.
2116
2117 We do not reap the children here (by using WNOWAIT), this
2118 is only done after the event source is dispatched so that
2119 the callback still sees the process as a zombie.
2120 */
2121
2122 HASHMAP_FOREACH(s, e->child_sources, i) {
2123 assert(s->type == SOURCE_CHILD);
2124
2125 if (s->pending)
2126 continue;
2127
2128 if (s->enabled == SD_EVENT_OFF)
2129 continue;
2130
2131 zero(s->child.siginfo);
2132 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2133 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2134 if (r < 0)
2135 return -errno;
2136
2137 if (s->child.siginfo.si_pid != 0) {
2138 bool zombie =
2139 s->child.siginfo.si_code == CLD_EXITED ||
2140 s->child.siginfo.si_code == CLD_KILLED ||
2141 s->child.siginfo.si_code == CLD_DUMPED;
2142
2143 if (!zombie && (s->child.options & WEXITED)) {
2144 /* If the child isn't dead then let's
2145 * immediately remove the state change
2146 * from the queue, since there's no
2147 * benefit in leaving it queued */
2148
2149 assert(s->child.options & (WSTOPPED|WCONTINUED));
2150 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2151 }
2152
2153 r = source_set_pending(s, true);
2154 if (r < 0)
2155 return r;
2156 }
2157 }
2158
2159 return 0;
2160 }
2161
2162 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2163 bool read_one = false;
2164 int r;
2165
2166 assert(e);
2167 assert_return(events == EPOLLIN, -EIO);
2168
2169 /* If there's a signal queued on this priority and SIGCHLD is
2170 on this priority too, then make sure to recheck the
2171 children we watch. This is because we only ever dequeue
2172 the first signal per priority, and if we dequeue one, and
2173 SIGCHLD might be enqueued later we wouldn't know, but we
2174 might have higher priority children we care about hence we
2175 need to check that explicitly. */
2176
2177 if (sigismember(&d->sigset, SIGCHLD))
2178 e->need_process_child = true;
2179
2180 /* If there's already an event source pending for this
2181 * priority we don't read another */
2182 if (d->current)
2183 return 0;
2184
2185 for (;;) {
2186 struct signalfd_siginfo si;
2187 ssize_t n;
2188 sd_event_source *s = NULL;
2189
2190 n = read(d->fd, &si, sizeof(si));
2191 if (n < 0) {
2192 if (errno == EAGAIN || errno == EINTR)
2193 return read_one;
2194
2195 return -errno;
2196 }
2197
2198 if (_unlikely_(n != sizeof(si)))
2199 return -EIO;
2200
2201 assert(si.ssi_signo < _NSIG);
2202
2203 read_one = true;
2204
2205 if (e->signal_sources)
2206 s = e->signal_sources[si.ssi_signo];
2207 if (!s)
2208 continue;
2209 if (s->pending)
2210 continue;
2211
2212 s->signal.siginfo = si;
2213 d->current = s;
2214
2215 r = source_set_pending(s, true);
2216 if (r < 0)
2217 return r;
2218
2219 return 1;
2220 }
2221 }
2222
2223 static int source_dispatch(sd_event_source *s) {
2224 int r = 0;
2225
2226 assert(s);
2227 assert(s->pending || s->type == SOURCE_EXIT);
2228
2229 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2230 r = source_set_pending(s, false);
2231 if (r < 0)
2232 return r;
2233 }
2234
2235 if (s->type != SOURCE_POST) {
2236 sd_event_source *z;
2237 Iterator i;
2238
2239 /* If we execute a non-post source, let's mark all
2240 * post sources as pending */
2241
2242 SET_FOREACH(z, s->event->post_sources, i) {
2243 if (z->enabled == SD_EVENT_OFF)
2244 continue;
2245
2246 r = source_set_pending(z, true);
2247 if (r < 0)
2248 return r;
2249 }
2250 }
2251
2252 if (s->enabled == SD_EVENT_ONESHOT) {
2253 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2254 if (r < 0)
2255 return r;
2256 }
2257
2258 s->dispatching = true;
2259
2260 switch (s->type) {
2261
2262 case SOURCE_IO:
2263 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2264 break;
2265
2266 case SOURCE_TIME_REALTIME:
2267 case SOURCE_TIME_BOOTTIME:
2268 case SOURCE_TIME_MONOTONIC:
2269 case SOURCE_TIME_REALTIME_ALARM:
2270 case SOURCE_TIME_BOOTTIME_ALARM:
2271 r = s->time.callback(s, s->time.next, s->userdata);
2272 break;
2273
2274 case SOURCE_SIGNAL:
2275 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2276 break;
2277
2278 case SOURCE_CHILD: {
2279 bool zombie;
2280
2281 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2282 s->child.siginfo.si_code == CLD_KILLED ||
2283 s->child.siginfo.si_code == CLD_DUMPED;
2284
2285 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2286
2287 /* Now, reap the PID for good. */
2288 if (zombie)
2289 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2290
2291 break;
2292 }
2293
2294 case SOURCE_DEFER:
2295 r = s->defer.callback(s, s->userdata);
2296 break;
2297
2298 case SOURCE_POST:
2299 r = s->post.callback(s, s->userdata);
2300 break;
2301
2302 case SOURCE_EXIT:
2303 r = s->exit.callback(s, s->userdata);
2304 break;
2305
2306 case SOURCE_WATCHDOG:
2307 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2308 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2309 assert_not_reached("Wut? I shouldn't exist.");
2310 }
2311
2312 s->dispatching = false;
2313
2314 if (r < 0) {
2315 if (s->description)
2316 log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2317 else
2318 log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2319 }
2320
2321 if (s->n_ref == 0)
2322 source_free(s);
2323 else if (r < 0)
2324 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2325
2326 return 1;
2327 }
2328
2329 static int event_prepare(sd_event *e) {
2330 int r;
2331
2332 assert(e);
2333
2334 for (;;) {
2335 sd_event_source *s;
2336
2337 s = prioq_peek(e->prepare);
2338 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2339 break;
2340
2341 s->prepare_iteration = e->iteration;
2342 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2343 if (r < 0)
2344 return r;
2345
2346 assert(s->prepare);
2347
2348 s->dispatching = true;
2349 r = s->prepare(s, s->userdata);
2350 s->dispatching = false;
2351
2352 if (r < 0) {
2353 if (s->description)
2354 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2355 else
2356 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2357 }
2358
2359 if (s->n_ref == 0)
2360 source_free(s);
2361 else if (r < 0)
2362 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2363 }
2364
2365 return 0;
2366 }
2367
2368 static int dispatch_exit(sd_event *e) {
2369 sd_event_source *p;
2370 int r;
2371
2372 assert(e);
2373
2374 p = prioq_peek(e->exit);
2375 if (!p || p->enabled == SD_EVENT_OFF) {
2376 e->state = SD_EVENT_FINISHED;
2377 return 0;
2378 }
2379
2380 sd_event_ref(e);
2381 e->iteration++;
2382 e->state = SD_EVENT_EXITING;
2383
2384 r = source_dispatch(p);
2385
2386 e->state = SD_EVENT_INITIAL;
2387 sd_event_unref(e);
2388
2389 return r;
2390 }
2391
2392 static sd_event_source* event_next_pending(sd_event *e) {
2393 sd_event_source *p;
2394
2395 assert(e);
2396
2397 p = prioq_peek(e->pending);
2398 if (!p)
2399 return NULL;
2400
2401 if (p->enabled == SD_EVENT_OFF)
2402 return NULL;
2403
2404 return p;
2405 }
2406
2407 static int arm_watchdog(sd_event *e) {
2408 struct itimerspec its = {};
2409 usec_t t;
2410 int r;
2411
2412 assert(e);
2413 assert(e->watchdog_fd >= 0);
2414
2415 t = sleep_between(e,
2416 e->watchdog_last + (e->watchdog_period / 2),
2417 e->watchdog_last + (e->watchdog_period * 3 / 4));
2418
2419 timespec_store(&its.it_value, t);
2420
2421 /* Make sure we never set the watchdog to 0, which tells the
2422 * kernel to disable it. */
2423 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2424 its.it_value.tv_nsec = 1;
2425
2426 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2427 if (r < 0)
2428 return -errno;
2429
2430 return 0;
2431 }
2432
2433 static int process_watchdog(sd_event *e) {
2434 assert(e);
2435
2436 if (!e->watchdog)
2437 return 0;
2438
2439 /* Don't notify watchdog too often */
2440 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2441 return 0;
2442
2443 sd_notify(false, "WATCHDOG=1");
2444 e->watchdog_last = e->timestamp.monotonic;
2445
2446 return arm_watchdog(e);
2447 }
2448
2449 _public_ int sd_event_prepare(sd_event *e) {
2450 int r;
2451
2452 assert_return(e, -EINVAL);
2453 assert_return(!event_pid_changed(e), -ECHILD);
2454 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2455 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2456
2457 if (e->exit_requested)
2458 goto pending;
2459
2460 e->iteration++;
2461
2462 r = event_prepare(e);
2463 if (r < 0)
2464 return r;
2465
2466 r = event_arm_timer(e, &e->realtime);
2467 if (r < 0)
2468 return r;
2469
2470 r = event_arm_timer(e, &e->boottime);
2471 if (r < 0)
2472 return r;
2473
2474 r = event_arm_timer(e, &e->monotonic);
2475 if (r < 0)
2476 return r;
2477
2478 r = event_arm_timer(e, &e->realtime_alarm);
2479 if (r < 0)
2480 return r;
2481
2482 r = event_arm_timer(e, &e->boottime_alarm);
2483 if (r < 0)
2484 return r;
2485
2486 if (event_next_pending(e) || e->need_process_child)
2487 goto pending;
2488
2489 e->state = SD_EVENT_ARMED;
2490
2491 return 0;
2492
2493 pending:
2494 e->state = SD_EVENT_ARMED;
2495 r = sd_event_wait(e, 0);
2496 if (r == 0)
2497 e->state = SD_EVENT_ARMED;
2498
2499 return r;
2500 }
2501
2502 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2503 struct epoll_event *ev_queue;
2504 unsigned ev_queue_max;
2505 int r, m, i;
2506
2507 assert_return(e, -EINVAL);
2508 assert_return(!event_pid_changed(e), -ECHILD);
2509 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2510 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2511
2512 if (e->exit_requested) {
2513 e->state = SD_EVENT_PENDING;
2514 return 1;
2515 }
2516
2517 ev_queue_max = MAX(e->n_sources, 1u);
2518 ev_queue = newa(struct epoll_event, ev_queue_max);
2519
2520 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2521 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2522 if (m < 0) {
2523 if (errno == EINTR) {
2524 e->state = SD_EVENT_PENDING;
2525 return 1;
2526 }
2527
2528 r = -errno;
2529 goto finish;
2530 }
2531
2532 dual_timestamp_get(&e->timestamp);
2533 e->timestamp_boottime = now(CLOCK_BOOTTIME);
2534
2535 for (i = 0; i < m; i++) {
2536
2537 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2538 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2539 else {
2540 WakeupType *t = ev_queue[i].data.ptr;
2541
2542 switch (*t) {
2543
2544 case WAKEUP_EVENT_SOURCE:
2545 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2546 break;
2547
2548 case WAKEUP_CLOCK_DATA: {
2549 struct clock_data *d = ev_queue[i].data.ptr;
2550 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2551 break;
2552 }
2553
2554 case WAKEUP_SIGNAL_DATA:
2555 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2556 break;
2557
2558 default:
2559 assert_not_reached("Invalid wake-up pointer");
2560 }
2561 }
2562 if (r < 0)
2563 goto finish;
2564 }
2565
2566 r = process_watchdog(e);
2567 if (r < 0)
2568 goto finish;
2569
2570 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2571 if (r < 0)
2572 goto finish;
2573
2574 r = process_timer(e, e->timestamp_boottime, &e->boottime);
2575 if (r < 0)
2576 goto finish;
2577
2578 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2579 if (r < 0)
2580 goto finish;
2581
2582 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2583 if (r < 0)
2584 goto finish;
2585
2586 r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2587 if (r < 0)
2588 goto finish;
2589
2590 if (e->need_process_child) {
2591 r = process_child(e);
2592 if (r < 0)
2593 goto finish;
2594 }
2595
2596 if (event_next_pending(e)) {
2597 e->state = SD_EVENT_PENDING;
2598
2599 return 1;
2600 }
2601
2602 r = 0;
2603
2604 finish:
2605 e->state = SD_EVENT_INITIAL;
2606
2607 return r;
2608 }
2609
2610 _public_ int sd_event_dispatch(sd_event *e) {
2611 sd_event_source *p;
2612 int r;
2613
2614 assert_return(e, -EINVAL);
2615 assert_return(!event_pid_changed(e), -ECHILD);
2616 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2617 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2618
2619 if (e->exit_requested)
2620 return dispatch_exit(e);
2621
2622 p = event_next_pending(e);
2623 if (p) {
2624 sd_event_ref(e);
2625
2626 e->state = SD_EVENT_RUNNING;
2627 r = source_dispatch(p);
2628 e->state = SD_EVENT_INITIAL;
2629
2630 sd_event_unref(e);
2631
2632 return r;
2633 }
2634
2635 e->state = SD_EVENT_INITIAL;
2636
2637 return 1;
2638 }
2639
2640 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2641 int r;
2642
2643 assert_return(e, -EINVAL);
2644 assert_return(!event_pid_changed(e), -ECHILD);
2645 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2646 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2647
2648 r = sd_event_prepare(e);
2649 if (r == 0)
2650 /* There was nothing? Then wait... */
2651 r = sd_event_wait(e, timeout);
2652
2653 if (r > 0) {
2654 /* There's something now, then let's dispatch it */
2655 r = sd_event_dispatch(e);
2656 if (r < 0)
2657 return r;
2658
2659 return 1;
2660 }
2661
2662 return r;
2663 }
2664
2665 _public_ int sd_event_loop(sd_event *e) {
2666 int r;
2667
2668 assert_return(e, -EINVAL);
2669 assert_return(!event_pid_changed(e), -ECHILD);
2670 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2671
2672 sd_event_ref(e);
2673
2674 while (e->state != SD_EVENT_FINISHED) {
2675 r = sd_event_run(e, (uint64_t) -1);
2676 if (r < 0)
2677 goto finish;
2678 }
2679
2680 r = e->exit_code;
2681
2682 finish:
2683 sd_event_unref(e);
2684 return r;
2685 }
2686
2687 _public_ int sd_event_get_fd(sd_event *e) {
2688
2689 assert_return(e, -EINVAL);
2690 assert_return(!event_pid_changed(e), -ECHILD);
2691
2692 return e->epoll_fd;
2693 }
2694
2695 _public_ int sd_event_get_state(sd_event *e) {
2696 assert_return(e, -EINVAL);
2697 assert_return(!event_pid_changed(e), -ECHILD);
2698
2699 return e->state;
2700 }
2701
2702 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2703 assert_return(e, -EINVAL);
2704 assert_return(code, -EINVAL);
2705 assert_return(!event_pid_changed(e), -ECHILD);
2706
2707 if (!e->exit_requested)
2708 return -ENODATA;
2709
2710 *code = e->exit_code;
2711 return 0;
2712 }
2713
2714 _public_ int sd_event_exit(sd_event *e, int code) {
2715 assert_return(e, -EINVAL);
2716 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2717 assert_return(!event_pid_changed(e), -ECHILD);
2718
2719 e->exit_requested = true;
2720 e->exit_code = code;
2721
2722 return 0;
2723 }
2724
2725 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2726 assert_return(e, -EINVAL);
2727 assert_return(usec, -EINVAL);
2728 assert_return(!event_pid_changed(e), -ECHILD);
2729
2730 if (!dual_timestamp_is_set(&e->timestamp)) {
2731 /* Implicitly fall back to now() if we never ran
2732 * before and thus have no cached time. */
2733 *usec = now(clock);
2734 return 1;
2735 }
2736
2737 switch (clock) {
2738
2739 case CLOCK_REALTIME:
2740 case CLOCK_REALTIME_ALARM:
2741 *usec = e->timestamp.realtime;
2742 break;
2743
2744 case CLOCK_MONOTONIC:
2745 *usec = e->timestamp.monotonic;
2746 break;
2747
2748 case CLOCK_BOOTTIME:
2749 case CLOCK_BOOTTIME_ALARM:
2750 *usec = e->timestamp_boottime;
2751 break;
2752 }
2753
2754 return 0;
2755 }
2756
2757 _public_ int sd_event_default(sd_event **ret) {
2758
2759 static thread_local sd_event *default_event = NULL;
2760 sd_event *e = NULL;
2761 int r;
2762
2763 if (!ret)
2764 return !!default_event;
2765
2766 if (default_event) {
2767 *ret = sd_event_ref(default_event);
2768 return 0;
2769 }
2770
2771 r = sd_event_new(&e);
2772 if (r < 0)
2773 return r;
2774
2775 e->default_event_ptr = &default_event;
2776 e->tid = gettid();
2777 default_event = e;
2778
2779 *ret = e;
2780 return 1;
2781 }
2782
2783 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2784 assert_return(e, -EINVAL);
2785 assert_return(tid, -EINVAL);
2786 assert_return(!event_pid_changed(e), -ECHILD);
2787
2788 if (e->tid != 0) {
2789 *tid = e->tid;
2790 return 0;
2791 }
2792
2793 return -ENXIO;
2794 }
2795
2796 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2797 int r;
2798
2799 assert_return(e, -EINVAL);
2800 assert_return(!event_pid_changed(e), -ECHILD);
2801
2802 if (e->watchdog == !!b)
2803 return e->watchdog;
2804
2805 if (b) {
2806 struct epoll_event ev = {};
2807
2808 r = sd_watchdog_enabled(false, &e->watchdog_period);
2809 if (r <= 0)
2810 return r;
2811
2812 /* Issue first ping immediately */
2813 sd_notify(false, "WATCHDOG=1");
2814 e->watchdog_last = now(CLOCK_MONOTONIC);
2815
2816 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2817 if (e->watchdog_fd < 0)
2818 return -errno;
2819
2820 r = arm_watchdog(e);
2821 if (r < 0)
2822 goto fail;
2823
2824 ev.events = EPOLLIN;
2825 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2826
2827 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2828 if (r < 0) {
2829 r = -errno;
2830 goto fail;
2831 }
2832
2833 } else {
2834 if (e->watchdog_fd >= 0) {
2835 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2836 e->watchdog_fd = safe_close(e->watchdog_fd);
2837 }
2838 }
2839
2840 e->watchdog = !!b;
2841 return e->watchdog;
2842
2843 fail:
2844 e->watchdog_fd = safe_close(e->watchdog_fd);
2845 return r;
2846 }
2847
2848 _public_ int sd_event_get_watchdog(sd_event *e) {
2849 assert_return(e, -EINVAL);
2850 assert_return(!event_pid_changed(e), -ECHILD);
2851
2852 return e->watchdog;
2853 }