]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
sd-event: don't provide priority stability
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
2
3 /***
4 This file is part of systemd.
5
6 Copyright 2013 Lennart Poettering
7
8 systemd is free software; you can redistribute it and/or modify it
9 under the terms of the GNU Lesser General Public License as published by
10 the Free Software Foundation; either version 2.1 of the License, or
11 (at your option) any later version.
12
13 systemd is distributed in the hope that it will be useful, but
14 WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public License
19 along with systemd; If not, see <http://www.gnu.org/licenses/>.
20 ***/
21
22 #include <sys/epoll.h>
23 #include <sys/timerfd.h>
24 #include <sys/wait.h>
25
26 #include "sd-id128.h"
27 #include "sd-daemon.h"
28 #include "macro.h"
29 #include "prioq.h"
30 #include "hashmap.h"
31 #include "util.h"
32 #include "time-util.h"
33 #include "missing.h"
34 #include "set.h"
35 #include "list.h"
36 #include "signal-util.h"
37
38 #include "sd-event.h"
39
40 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
41
42 typedef enum EventSourceType {
43 SOURCE_IO,
44 SOURCE_TIME_REALTIME,
45 SOURCE_TIME_BOOTTIME,
46 SOURCE_TIME_MONOTONIC,
47 SOURCE_TIME_REALTIME_ALARM,
48 SOURCE_TIME_BOOTTIME_ALARM,
49 SOURCE_SIGNAL,
50 SOURCE_CHILD,
51 SOURCE_DEFER,
52 SOURCE_POST,
53 SOURCE_EXIT,
54 SOURCE_WATCHDOG,
55 _SOURCE_EVENT_SOURCE_TYPE_MAX,
56 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
57 } EventSourceType;
58
59 /* All objects we use in epoll events start with this value, so that
60 * we know how to dispatch it */
61 typedef enum WakeupType {
62 WAKEUP_NONE,
63 WAKEUP_EVENT_SOURCE,
64 WAKEUP_CLOCK_DATA,
65 WAKEUP_SIGNAL_DATA,
66 _WAKEUP_TYPE_MAX,
67 _WAKEUP_TYPE_INVALID = -1,
68 } WakeupType;
69
70 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
71
72 struct sd_event_source {
73 WakeupType wakeup;
74
75 unsigned n_ref;
76
77 sd_event *event;
78 void *userdata;
79 sd_event_handler_t prepare;
80
81 char *description;
82
83 EventSourceType type:5;
84 int enabled:3;
85 bool pending:1;
86 bool dispatching:1;
87 bool floating:1;
88
89 int64_t priority;
90 unsigned pending_index;
91 unsigned prepare_index;
92 unsigned pending_iteration;
93 unsigned prepare_iteration;
94
95 LIST_FIELDS(sd_event_source, sources);
96
97 union {
98 struct {
99 sd_event_io_handler_t callback;
100 int fd;
101 uint32_t events;
102 uint32_t revents;
103 bool registered:1;
104 } io;
105 struct {
106 sd_event_time_handler_t callback;
107 usec_t next, accuracy;
108 unsigned earliest_index;
109 unsigned latest_index;
110 } time;
111 struct {
112 sd_event_signal_handler_t callback;
113 struct signalfd_siginfo siginfo;
114 int sig;
115 } signal;
116 struct {
117 sd_event_child_handler_t callback;
118 siginfo_t siginfo;
119 pid_t pid;
120 int options;
121 } child;
122 struct {
123 sd_event_handler_t callback;
124 } defer;
125 struct {
126 sd_event_handler_t callback;
127 } post;
128 struct {
129 sd_event_handler_t callback;
130 unsigned prioq_index;
131 } exit;
132 };
133 };
134
135 struct clock_data {
136 WakeupType wakeup;
137 int fd;
138
139 /* For all clocks we maintain two priority queues each, one
140 * ordered for the earliest times the events may be
141 * dispatched, and one ordered by the latest times they must
142 * have been dispatched. The range between the top entries in
143 * the two prioqs is the time window we can freely schedule
144 * wakeups in */
145
146 Prioq *earliest;
147 Prioq *latest;
148 usec_t next;
149
150 bool needs_rearm:1;
151 };
152
153 struct signal_data {
154 WakeupType wakeup;
155
156 /* For each priority we maintain one signal fd, so that we
157 * only have to dequeue a single event per priority at a
158 * time. */
159
160 int fd;
161 int64_t priority;
162 sigset_t sigset;
163 sd_event_source *current;
164 };
165
166 struct sd_event {
167 unsigned n_ref;
168
169 int epoll_fd;
170 int watchdog_fd;
171
172 Prioq *pending;
173 Prioq *prepare;
174
175 /* timerfd_create() only supports these five clocks so far. We
176 * can add support for more clocks when the kernel learns to
177 * deal with them, too. */
178 struct clock_data realtime;
179 struct clock_data boottime;
180 struct clock_data monotonic;
181 struct clock_data realtime_alarm;
182 struct clock_data boottime_alarm;
183
184 usec_t perturb;
185
186 sd_event_source **signal_sources; /* indexed by signal number */
187 Hashmap *signal_data; /* indexed by priority */
188
189 Hashmap *child_sources;
190 unsigned n_enabled_child_sources;
191
192 Set *post_sources;
193
194 Prioq *exit;
195
196 pid_t original_pid;
197
198 unsigned iteration;
199 dual_timestamp timestamp;
200 usec_t timestamp_boottime;
201 int state;
202
203 bool exit_requested:1;
204 bool need_process_child:1;
205 bool watchdog:1;
206
207 int exit_code;
208
209 pid_t tid;
210 sd_event **default_event_ptr;
211
212 usec_t watchdog_last, watchdog_period;
213
214 unsigned n_sources;
215
216 LIST_HEAD(sd_event_source, sources);
217 };
218
219 static void source_disconnect(sd_event_source *s);
220
221 static int pending_prioq_compare(const void *a, const void *b) {
222 const sd_event_source *x = a, *y = b;
223
224 assert(x->pending);
225 assert(y->pending);
226
227 /* Enabled ones first */
228 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
229 return -1;
230 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
231 return 1;
232
233 /* Lower priority values first */
234 if (x->priority < y->priority)
235 return -1;
236 if (x->priority > y->priority)
237 return 1;
238
239 /* Older entries first */
240 if (x->pending_iteration < y->pending_iteration)
241 return -1;
242 if (x->pending_iteration > y->pending_iteration)
243 return 1;
244
245 return 0;
246 }
247
248 static int prepare_prioq_compare(const void *a, const void *b) {
249 const sd_event_source *x = a, *y = b;
250
251 assert(x->prepare);
252 assert(y->prepare);
253
254 /* Enabled ones first */
255 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
256 return -1;
257 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
258 return 1;
259
260 /* Move most recently prepared ones last, so that we can stop
261 * preparing as soon as we hit one that has already been
262 * prepared in the current iteration */
263 if (x->prepare_iteration < y->prepare_iteration)
264 return -1;
265 if (x->prepare_iteration > y->prepare_iteration)
266 return 1;
267
268 /* Lower priority values first */
269 if (x->priority < y->priority)
270 return -1;
271 if (x->priority > y->priority)
272 return 1;
273
274 return 0;
275 }
276
277 static int earliest_time_prioq_compare(const void *a, const void *b) {
278 const sd_event_source *x = a, *y = b;
279
280 assert(EVENT_SOURCE_IS_TIME(x->type));
281 assert(x->type == y->type);
282
283 /* Enabled ones first */
284 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
285 return -1;
286 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
287 return 1;
288
289 /* Move the pending ones to the end */
290 if (!x->pending && y->pending)
291 return -1;
292 if (x->pending && !y->pending)
293 return 1;
294
295 /* Order by time */
296 if (x->time.next < y->time.next)
297 return -1;
298 if (x->time.next > y->time.next)
299 return 1;
300
301 return 0;
302 }
303
304 static int latest_time_prioq_compare(const void *a, const void *b) {
305 const sd_event_source *x = a, *y = b;
306
307 assert(EVENT_SOURCE_IS_TIME(x->type));
308 assert(x->type == y->type);
309
310 /* Enabled ones first */
311 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
312 return -1;
313 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
314 return 1;
315
316 /* Move the pending ones to the end */
317 if (!x->pending && y->pending)
318 return -1;
319 if (x->pending && !y->pending)
320 return 1;
321
322 /* Order by time */
323 if (x->time.next + x->time.accuracy < y->time.next + y->time.accuracy)
324 return -1;
325 if (x->time.next + x->time.accuracy > y->time.next + y->time.accuracy)
326 return 1;
327
328 return 0;
329 }
330
331 static int exit_prioq_compare(const void *a, const void *b) {
332 const sd_event_source *x = a, *y = b;
333
334 assert(x->type == SOURCE_EXIT);
335 assert(y->type == SOURCE_EXIT);
336
337 /* Enabled ones first */
338 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
339 return -1;
340 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
341 return 1;
342
343 /* Lower priority values first */
344 if (x->priority < y->priority)
345 return -1;
346 if (x->priority > y->priority)
347 return 1;
348
349 return 0;
350 }
351
352 static void free_clock_data(struct clock_data *d) {
353 assert(d);
354 assert(d->wakeup == WAKEUP_CLOCK_DATA);
355
356 safe_close(d->fd);
357 prioq_free(d->earliest);
358 prioq_free(d->latest);
359 }
360
361 static void event_free(sd_event *e) {
362 sd_event_source *s;
363
364 assert(e);
365
366 while ((s = e->sources)) {
367 assert(s->floating);
368 source_disconnect(s);
369 sd_event_source_unref(s);
370 }
371
372 assert(e->n_sources == 0);
373
374 if (e->default_event_ptr)
375 *(e->default_event_ptr) = NULL;
376
377 safe_close(e->epoll_fd);
378 safe_close(e->watchdog_fd);
379
380 free_clock_data(&e->realtime);
381 free_clock_data(&e->boottime);
382 free_clock_data(&e->monotonic);
383 free_clock_data(&e->realtime_alarm);
384 free_clock_data(&e->boottime_alarm);
385
386 prioq_free(e->pending);
387 prioq_free(e->prepare);
388 prioq_free(e->exit);
389
390 free(e->signal_sources);
391 hashmap_free(e->signal_data);
392
393 hashmap_free(e->child_sources);
394 set_free(e->post_sources);
395 free(e);
396 }
397
398 _public_ int sd_event_new(sd_event** ret) {
399 sd_event *e;
400 int r;
401
402 assert_return(ret, -EINVAL);
403
404 e = new0(sd_event, 1);
405 if (!e)
406 return -ENOMEM;
407
408 e->n_ref = 1;
409 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
410 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
411 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
412 e->original_pid = getpid();
413 e->perturb = USEC_INFINITY;
414
415 e->pending = prioq_new(pending_prioq_compare);
416 if (!e->pending) {
417 r = -ENOMEM;
418 goto fail;
419 }
420
421 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
422 if (e->epoll_fd < 0) {
423 r = -errno;
424 goto fail;
425 }
426
427 *ret = e;
428 return 0;
429
430 fail:
431 event_free(e);
432 return r;
433 }
434
435 _public_ sd_event* sd_event_ref(sd_event *e) {
436 assert_return(e, NULL);
437
438 assert(e->n_ref >= 1);
439 e->n_ref++;
440
441 return e;
442 }
443
444 _public_ sd_event* sd_event_unref(sd_event *e) {
445
446 if (!e)
447 return NULL;
448
449 assert(e->n_ref >= 1);
450 e->n_ref--;
451
452 if (e->n_ref <= 0)
453 event_free(e);
454
455 return NULL;
456 }
457
458 static bool event_pid_changed(sd_event *e) {
459 assert(e);
460
461 /* We don't support people creating an event loop and keeping
462 * it around over a fork(). Let's complain. */
463
464 return e->original_pid != getpid();
465 }
466
467 static void source_io_unregister(sd_event_source *s) {
468 int r;
469
470 assert(s);
471 assert(s->type == SOURCE_IO);
472
473 if (event_pid_changed(s->event))
474 return;
475
476 if (!s->io.registered)
477 return;
478
479 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
480 if (r < 0)
481 log_debug_errno(errno, "Failed to remove source %s from epoll: %m", strna(s->description));
482
483 s->io.registered = false;
484 }
485
486 static int source_io_register(
487 sd_event_source *s,
488 int enabled,
489 uint32_t events) {
490
491 struct epoll_event ev = {};
492 int r;
493
494 assert(s);
495 assert(s->type == SOURCE_IO);
496 assert(enabled != SD_EVENT_OFF);
497
498 ev.events = events;
499 ev.data.ptr = s;
500
501 if (enabled == SD_EVENT_ONESHOT)
502 ev.events |= EPOLLONESHOT;
503
504 if (s->io.registered)
505 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
506 else
507 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
508 if (r < 0)
509 return -errno;
510
511 s->io.registered = true;
512
513 return 0;
514 }
515
516 static clockid_t event_source_type_to_clock(EventSourceType t) {
517
518 switch (t) {
519
520 case SOURCE_TIME_REALTIME:
521 return CLOCK_REALTIME;
522
523 case SOURCE_TIME_BOOTTIME:
524 return CLOCK_BOOTTIME;
525
526 case SOURCE_TIME_MONOTONIC:
527 return CLOCK_MONOTONIC;
528
529 case SOURCE_TIME_REALTIME_ALARM:
530 return CLOCK_REALTIME_ALARM;
531
532 case SOURCE_TIME_BOOTTIME_ALARM:
533 return CLOCK_BOOTTIME_ALARM;
534
535 default:
536 return (clockid_t) -1;
537 }
538 }
539
540 static EventSourceType clock_to_event_source_type(clockid_t clock) {
541
542 switch (clock) {
543
544 case CLOCK_REALTIME:
545 return SOURCE_TIME_REALTIME;
546
547 case CLOCK_BOOTTIME:
548 return SOURCE_TIME_BOOTTIME;
549
550 case CLOCK_MONOTONIC:
551 return SOURCE_TIME_MONOTONIC;
552
553 case CLOCK_REALTIME_ALARM:
554 return SOURCE_TIME_REALTIME_ALARM;
555
556 case CLOCK_BOOTTIME_ALARM:
557 return SOURCE_TIME_BOOTTIME_ALARM;
558
559 default:
560 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
561 }
562 }
563
564 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
565 assert(e);
566
567 switch (t) {
568
569 case SOURCE_TIME_REALTIME:
570 return &e->realtime;
571
572 case SOURCE_TIME_BOOTTIME:
573 return &e->boottime;
574
575 case SOURCE_TIME_MONOTONIC:
576 return &e->monotonic;
577
578 case SOURCE_TIME_REALTIME_ALARM:
579 return &e->realtime_alarm;
580
581 case SOURCE_TIME_BOOTTIME_ALARM:
582 return &e->boottime_alarm;
583
584 default:
585 return NULL;
586 }
587 }
588
589 static int event_make_signal_data(
590 sd_event *e,
591 int sig,
592 struct signal_data **ret) {
593
594 struct epoll_event ev = {};
595 struct signal_data *d;
596 bool added = false;
597 sigset_t ss_copy;
598 int64_t priority;
599 int r;
600
601 assert(e);
602
603 if (event_pid_changed(e))
604 return -ECHILD;
605
606 if (e->signal_sources && e->signal_sources[sig])
607 priority = e->signal_sources[sig]->priority;
608 else
609 priority = 0;
610
611 d = hashmap_get(e->signal_data, &priority);
612 if (d) {
613 if (sigismember(&d->sigset, sig) > 0) {
614 if (ret)
615 *ret = d;
616 return 0;
617 }
618 } else {
619 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
620 if (r < 0)
621 return r;
622
623 d = new0(struct signal_data, 1);
624 if (!d)
625 return -ENOMEM;
626
627 d->wakeup = WAKEUP_SIGNAL_DATA;
628 d->fd = -1;
629 d->priority = priority;
630
631 r = hashmap_put(e->signal_data, &d->priority, d);
632 if (r < 0)
633 return r;
634
635 added = true;
636 }
637
638 ss_copy = d->sigset;
639 assert_se(sigaddset(&ss_copy, sig) >= 0);
640
641 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
642 if (r < 0) {
643 r = -errno;
644 goto fail;
645 }
646
647 d->sigset = ss_copy;
648
649 if (d->fd >= 0) {
650 if (ret)
651 *ret = d;
652 return 0;
653 }
654
655 d->fd = r;
656
657 ev.events = EPOLLIN;
658 ev.data.ptr = d;
659
660 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
661 if (r < 0) {
662 r = -errno;
663 goto fail;
664 }
665
666 if (ret)
667 *ret = d;
668
669 return 0;
670
671 fail:
672 if (added) {
673 d->fd = safe_close(d->fd);
674 hashmap_remove(e->signal_data, &d->priority);
675 free(d);
676 }
677
678 return r;
679 }
680
681 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
682 assert(e);
683 assert(d);
684
685 /* Turns off the specified signal in the signal data
686 * object. If the signal mask of the object becomes empty that
687 * way removes it. */
688
689 if (sigismember(&d->sigset, sig) == 0)
690 return;
691
692 assert_se(sigdelset(&d->sigset, sig) >= 0);
693
694 if (sigisemptyset(&d->sigset)) {
695
696 /* If all the mask is all-zero we can get rid of the structure */
697 hashmap_remove(e->signal_data, &d->priority);
698 assert(!d->current);
699 safe_close(d->fd);
700 free(d);
701 return;
702 }
703
704 assert(d->fd >= 0);
705
706 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
707 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
708 }
709
710 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
711 struct signal_data *d;
712 static const int64_t zero_priority = 0;
713
714 assert(e);
715
716 /* Rechecks if the specified signal is still something we are
717 * interested in. If not, we'll unmask it, and possibly drop
718 * the signalfd for it. */
719
720 if (sig == SIGCHLD &&
721 e->n_enabled_child_sources > 0)
722 return;
723
724 if (e->signal_sources &&
725 e->signal_sources[sig] &&
726 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
727 return;
728
729 /*
730 * The specified signal might be enabled in three different queues:
731 *
732 * 1) the one that belongs to the priority passed (if it is non-NULL)
733 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
734 * 3) the 0 priority (to cover the SIGCHLD case)
735 *
736 * Hence, let's remove it from all three here.
737 */
738
739 if (priority) {
740 d = hashmap_get(e->signal_data, priority);
741 if (d)
742 event_unmask_signal_data(e, d, sig);
743 }
744
745 if (e->signal_sources && e->signal_sources[sig]) {
746 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
747 if (d)
748 event_unmask_signal_data(e, d, sig);
749 }
750
751 d = hashmap_get(e->signal_data, &zero_priority);
752 if (d)
753 event_unmask_signal_data(e, d, sig);
754 }
755
756 static void source_disconnect(sd_event_source *s) {
757 sd_event *event;
758
759 assert(s);
760
761 if (!s->event)
762 return;
763
764 assert(s->event->n_sources > 0);
765
766 switch (s->type) {
767
768 case SOURCE_IO:
769 if (s->io.fd >= 0)
770 source_io_unregister(s);
771
772 break;
773
774 case SOURCE_TIME_REALTIME:
775 case SOURCE_TIME_BOOTTIME:
776 case SOURCE_TIME_MONOTONIC:
777 case SOURCE_TIME_REALTIME_ALARM:
778 case SOURCE_TIME_BOOTTIME_ALARM: {
779 struct clock_data *d;
780
781 d = event_get_clock_data(s->event, s->type);
782 assert(d);
783
784 prioq_remove(d->earliest, s, &s->time.earliest_index);
785 prioq_remove(d->latest, s, &s->time.latest_index);
786 d->needs_rearm = true;
787 break;
788 }
789
790 case SOURCE_SIGNAL:
791 if (s->signal.sig > 0) {
792
793 if (s->event->signal_sources)
794 s->event->signal_sources[s->signal.sig] = NULL;
795
796 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
797 }
798
799 break;
800
801 case SOURCE_CHILD:
802 if (s->child.pid > 0) {
803 if (s->enabled != SD_EVENT_OFF) {
804 assert(s->event->n_enabled_child_sources > 0);
805 s->event->n_enabled_child_sources--;
806 }
807
808 (void) hashmap_remove(s->event->child_sources, INT_TO_PTR(s->child.pid));
809 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
810 }
811
812 break;
813
814 case SOURCE_DEFER:
815 /* nothing */
816 break;
817
818 case SOURCE_POST:
819 set_remove(s->event->post_sources, s);
820 break;
821
822 case SOURCE_EXIT:
823 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
824 break;
825
826 default:
827 assert_not_reached("Wut? I shouldn't exist.");
828 }
829
830 if (s->pending)
831 prioq_remove(s->event->pending, s, &s->pending_index);
832
833 if (s->prepare)
834 prioq_remove(s->event->prepare, s, &s->prepare_index);
835
836 event = s->event;
837
838 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
839 s->event = NULL;
840 LIST_REMOVE(sources, event->sources, s);
841 event->n_sources--;
842
843 if (!s->floating)
844 sd_event_unref(event);
845 }
846
847 static void source_free(sd_event_source *s) {
848 assert(s);
849
850 source_disconnect(s);
851 free(s->description);
852 free(s);
853 }
854
855 static int source_set_pending(sd_event_source *s, bool b) {
856 int r;
857
858 assert(s);
859 assert(s->type != SOURCE_EXIT);
860
861 if (s->pending == b)
862 return 0;
863
864 s->pending = b;
865
866 if (b) {
867 s->pending_iteration = s->event->iteration;
868
869 r = prioq_put(s->event->pending, s, &s->pending_index);
870 if (r < 0) {
871 s->pending = false;
872 return r;
873 }
874 } else
875 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
876
877 if (EVENT_SOURCE_IS_TIME(s->type)) {
878 struct clock_data *d;
879
880 d = event_get_clock_data(s->event, s->type);
881 assert(d);
882
883 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
884 prioq_reshuffle(d->latest, s, &s->time.latest_index);
885 d->needs_rearm = true;
886 }
887
888 if (s->type == SOURCE_SIGNAL && !b) {
889 struct signal_data *d;
890
891 d = hashmap_get(s->event->signal_data, &s->priority);
892 if (d && d->current == s)
893 d->current = NULL;
894 }
895
896 return 0;
897 }
898
899 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
900 sd_event_source *s;
901
902 assert(e);
903
904 s = new0(sd_event_source, 1);
905 if (!s)
906 return NULL;
907
908 s->n_ref = 1;
909 s->event = e;
910 s->floating = floating;
911 s->type = type;
912 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
913
914 if (!floating)
915 sd_event_ref(e);
916
917 LIST_PREPEND(sources, e->sources, s);
918 e->n_sources ++;
919
920 return s;
921 }
922
923 _public_ int sd_event_add_io(
924 sd_event *e,
925 sd_event_source **ret,
926 int fd,
927 uint32_t events,
928 sd_event_io_handler_t callback,
929 void *userdata) {
930
931 sd_event_source *s;
932 int r;
933
934 assert_return(e, -EINVAL);
935 assert_return(fd >= 0, -EBADF);
936 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
937 assert_return(callback, -EINVAL);
938 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
939 assert_return(!event_pid_changed(e), -ECHILD);
940
941 s = source_new(e, !ret, SOURCE_IO);
942 if (!s)
943 return -ENOMEM;
944
945 s->wakeup = WAKEUP_EVENT_SOURCE;
946 s->io.fd = fd;
947 s->io.events = events;
948 s->io.callback = callback;
949 s->userdata = userdata;
950 s->enabled = SD_EVENT_ON;
951
952 r = source_io_register(s, s->enabled, events);
953 if (r < 0) {
954 source_free(s);
955 return r;
956 }
957
958 if (ret)
959 *ret = s;
960
961 return 0;
962 }
963
964 static void initialize_perturb(sd_event *e) {
965 sd_id128_t bootid = {};
966
967 /* When we sleep for longer, we try to realign the wakeup to
968 the same time wihtin each minute/second/250ms, so that
969 events all across the system can be coalesced into a single
970 CPU wakeup. However, let's take some system-specific
971 randomness for this value, so that in a network of systems
972 with synced clocks timer events are distributed a
973 bit. Here, we calculate a perturbation usec offset from the
974 boot ID. */
975
976 if (_likely_(e->perturb != USEC_INFINITY))
977 return;
978
979 if (sd_id128_get_boot(&bootid) >= 0)
980 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
981 }
982
983 static int event_setup_timer_fd(
984 sd_event *e,
985 struct clock_data *d,
986 clockid_t clock) {
987
988 struct epoll_event ev = {};
989 int r, fd;
990
991 assert(e);
992 assert(d);
993
994 if (_likely_(d->fd >= 0))
995 return 0;
996
997 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
998 if (fd < 0)
999 return -errno;
1000
1001 ev.events = EPOLLIN;
1002 ev.data.ptr = d;
1003
1004 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1005 if (r < 0) {
1006 safe_close(fd);
1007 return -errno;
1008 }
1009
1010 d->fd = fd;
1011 return 0;
1012 }
1013
1014 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1015 assert(s);
1016
1017 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1018 }
1019
1020 _public_ int sd_event_add_time(
1021 sd_event *e,
1022 sd_event_source **ret,
1023 clockid_t clock,
1024 uint64_t usec,
1025 uint64_t accuracy,
1026 sd_event_time_handler_t callback,
1027 void *userdata) {
1028
1029 EventSourceType type;
1030 sd_event_source *s;
1031 struct clock_data *d;
1032 int r;
1033
1034 assert_return(e, -EINVAL);
1035 assert_return(usec != (uint64_t) -1, -EINVAL);
1036 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1037 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1038 assert_return(!event_pid_changed(e), -ECHILD);
1039
1040 if (!callback)
1041 callback = time_exit_callback;
1042
1043 type = clock_to_event_source_type(clock);
1044 assert_return(type >= 0, -EOPNOTSUPP);
1045
1046 d = event_get_clock_data(e, type);
1047 assert(d);
1048
1049 if (!d->earliest) {
1050 d->earliest = prioq_new(earliest_time_prioq_compare);
1051 if (!d->earliest)
1052 return -ENOMEM;
1053 }
1054
1055 if (!d->latest) {
1056 d->latest = prioq_new(latest_time_prioq_compare);
1057 if (!d->latest)
1058 return -ENOMEM;
1059 }
1060
1061 if (d->fd < 0) {
1062 r = event_setup_timer_fd(e, d, clock);
1063 if (r < 0)
1064 return r;
1065 }
1066
1067 s = source_new(e, !ret, type);
1068 if (!s)
1069 return -ENOMEM;
1070
1071 s->time.next = usec;
1072 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1073 s->time.callback = callback;
1074 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1075 s->userdata = userdata;
1076 s->enabled = SD_EVENT_ONESHOT;
1077
1078 d->needs_rearm = true;
1079
1080 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1081 if (r < 0)
1082 goto fail;
1083
1084 r = prioq_put(d->latest, s, &s->time.latest_index);
1085 if (r < 0)
1086 goto fail;
1087
1088 if (ret)
1089 *ret = s;
1090
1091 return 0;
1092
1093 fail:
1094 source_free(s);
1095 return r;
1096 }
1097
1098 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1099 assert(s);
1100
1101 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1102 }
1103
1104 _public_ int sd_event_add_signal(
1105 sd_event *e,
1106 sd_event_source **ret,
1107 int sig,
1108 sd_event_signal_handler_t callback,
1109 void *userdata) {
1110
1111 sd_event_source *s;
1112 struct signal_data *d;
1113 sigset_t ss;
1114 int r;
1115
1116 assert_return(e, -EINVAL);
1117 assert_return(sig > 0, -EINVAL);
1118 assert_return(sig < _NSIG, -EINVAL);
1119 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1120 assert_return(!event_pid_changed(e), -ECHILD);
1121
1122 if (!callback)
1123 callback = signal_exit_callback;
1124
1125 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1126 if (r < 0)
1127 return -errno;
1128
1129 if (!sigismember(&ss, sig))
1130 return -EBUSY;
1131
1132 if (!e->signal_sources) {
1133 e->signal_sources = new0(sd_event_source*, _NSIG);
1134 if (!e->signal_sources)
1135 return -ENOMEM;
1136 } else if (e->signal_sources[sig])
1137 return -EBUSY;
1138
1139 s = source_new(e, !ret, SOURCE_SIGNAL);
1140 if (!s)
1141 return -ENOMEM;
1142
1143 s->signal.sig = sig;
1144 s->signal.callback = callback;
1145 s->userdata = userdata;
1146 s->enabled = SD_EVENT_ON;
1147
1148 e->signal_sources[sig] = s;
1149
1150 r = event_make_signal_data(e, sig, &d);
1151 if (r < 0) {
1152 source_free(s);
1153 return r;
1154 }
1155
1156 /* Use the signal name as description for the event source by default */
1157 (void) sd_event_source_set_description(s, signal_to_string(sig));
1158
1159 if (ret)
1160 *ret = s;
1161
1162 return 0;
1163 }
1164
1165 _public_ int sd_event_add_child(
1166 sd_event *e,
1167 sd_event_source **ret,
1168 pid_t pid,
1169 int options,
1170 sd_event_child_handler_t callback,
1171 void *userdata) {
1172
1173 sd_event_source *s;
1174 int r;
1175
1176 assert_return(e, -EINVAL);
1177 assert_return(pid > 1, -EINVAL);
1178 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1179 assert_return(options != 0, -EINVAL);
1180 assert_return(callback, -EINVAL);
1181 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1182 assert_return(!event_pid_changed(e), -ECHILD);
1183
1184 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1185 if (r < 0)
1186 return r;
1187
1188 if (hashmap_contains(e->child_sources, INT_TO_PTR(pid)))
1189 return -EBUSY;
1190
1191 s = source_new(e, !ret, SOURCE_CHILD);
1192 if (!s)
1193 return -ENOMEM;
1194
1195 s->child.pid = pid;
1196 s->child.options = options;
1197 s->child.callback = callback;
1198 s->userdata = userdata;
1199 s->enabled = SD_EVENT_ONESHOT;
1200
1201 r = hashmap_put(e->child_sources, INT_TO_PTR(pid), s);
1202 if (r < 0) {
1203 source_free(s);
1204 return r;
1205 }
1206
1207 e->n_enabled_child_sources ++;
1208
1209 r = event_make_signal_data(e, SIGCHLD, NULL);
1210 if (r < 0) {
1211 e->n_enabled_child_sources--;
1212 source_free(s);
1213 return r;
1214 }
1215
1216 e->need_process_child = true;
1217
1218 if (ret)
1219 *ret = s;
1220
1221 return 0;
1222 }
1223
1224 _public_ int sd_event_add_defer(
1225 sd_event *e,
1226 sd_event_source **ret,
1227 sd_event_handler_t callback,
1228 void *userdata) {
1229
1230 sd_event_source *s;
1231 int r;
1232
1233 assert_return(e, -EINVAL);
1234 assert_return(callback, -EINVAL);
1235 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1236 assert_return(!event_pid_changed(e), -ECHILD);
1237
1238 s = source_new(e, !ret, SOURCE_DEFER);
1239 if (!s)
1240 return -ENOMEM;
1241
1242 s->defer.callback = callback;
1243 s->userdata = userdata;
1244 s->enabled = SD_EVENT_ONESHOT;
1245
1246 r = source_set_pending(s, true);
1247 if (r < 0) {
1248 source_free(s);
1249 return r;
1250 }
1251
1252 if (ret)
1253 *ret = s;
1254
1255 return 0;
1256 }
1257
1258 _public_ int sd_event_add_post(
1259 sd_event *e,
1260 sd_event_source **ret,
1261 sd_event_handler_t callback,
1262 void *userdata) {
1263
1264 sd_event_source *s;
1265 int r;
1266
1267 assert_return(e, -EINVAL);
1268 assert_return(callback, -EINVAL);
1269 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1270 assert_return(!event_pid_changed(e), -ECHILD);
1271
1272 r = set_ensure_allocated(&e->post_sources, NULL);
1273 if (r < 0)
1274 return r;
1275
1276 s = source_new(e, !ret, SOURCE_POST);
1277 if (!s)
1278 return -ENOMEM;
1279
1280 s->post.callback = callback;
1281 s->userdata = userdata;
1282 s->enabled = SD_EVENT_ON;
1283
1284 r = set_put(e->post_sources, s);
1285 if (r < 0) {
1286 source_free(s);
1287 return r;
1288 }
1289
1290 if (ret)
1291 *ret = s;
1292
1293 return 0;
1294 }
1295
1296 _public_ int sd_event_add_exit(
1297 sd_event *e,
1298 sd_event_source **ret,
1299 sd_event_handler_t callback,
1300 void *userdata) {
1301
1302 sd_event_source *s;
1303 int r;
1304
1305 assert_return(e, -EINVAL);
1306 assert_return(callback, -EINVAL);
1307 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1308 assert_return(!event_pid_changed(e), -ECHILD);
1309
1310 if (!e->exit) {
1311 e->exit = prioq_new(exit_prioq_compare);
1312 if (!e->exit)
1313 return -ENOMEM;
1314 }
1315
1316 s = source_new(e, !ret, SOURCE_EXIT);
1317 if (!s)
1318 return -ENOMEM;
1319
1320 s->exit.callback = callback;
1321 s->userdata = userdata;
1322 s->exit.prioq_index = PRIOQ_IDX_NULL;
1323 s->enabled = SD_EVENT_ONESHOT;
1324
1325 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1326 if (r < 0) {
1327 source_free(s);
1328 return r;
1329 }
1330
1331 if (ret)
1332 *ret = s;
1333
1334 return 0;
1335 }
1336
1337 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1338 assert_return(s, NULL);
1339
1340 assert(s->n_ref >= 1);
1341 s->n_ref++;
1342
1343 return s;
1344 }
1345
1346 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1347
1348 if (!s)
1349 return NULL;
1350
1351 assert(s->n_ref >= 1);
1352 s->n_ref--;
1353
1354 if (s->n_ref <= 0) {
1355 /* Here's a special hack: when we are called from a
1356 * dispatch handler we won't free the event source
1357 * immediately, but we will detach the fd from the
1358 * epoll. This way it is safe for the caller to unref
1359 * the event source and immediately close the fd, but
1360 * we still retain a valid event source object after
1361 * the callback. */
1362
1363 if (s->dispatching) {
1364 if (s->type == SOURCE_IO)
1365 source_io_unregister(s);
1366
1367 source_disconnect(s);
1368 } else
1369 source_free(s);
1370 }
1371
1372 return NULL;
1373 }
1374
1375 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1376 assert_return(s, -EINVAL);
1377 assert_return(!event_pid_changed(s->event), -ECHILD);
1378
1379 return free_and_strdup(&s->description, description);
1380 }
1381
1382 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1383 assert_return(s, -EINVAL);
1384 assert_return(description, -EINVAL);
1385 assert_return(s->description, -ENXIO);
1386 assert_return(!event_pid_changed(s->event), -ECHILD);
1387
1388 *description = s->description;
1389 return 0;
1390 }
1391
1392 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1393 assert_return(s, NULL);
1394
1395 return s->event;
1396 }
1397
1398 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1399 assert_return(s, -EINVAL);
1400 assert_return(s->type != SOURCE_EXIT, -EDOM);
1401 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1402 assert_return(!event_pid_changed(s->event), -ECHILD);
1403
1404 return s->pending;
1405 }
1406
1407 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1408 assert_return(s, -EINVAL);
1409 assert_return(s->type == SOURCE_IO, -EDOM);
1410 assert_return(!event_pid_changed(s->event), -ECHILD);
1411
1412 return s->io.fd;
1413 }
1414
1415 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1416 int r;
1417
1418 assert_return(s, -EINVAL);
1419 assert_return(fd >= 0, -EBADF);
1420 assert_return(s->type == SOURCE_IO, -EDOM);
1421 assert_return(!event_pid_changed(s->event), -ECHILD);
1422
1423 if (s->io.fd == fd)
1424 return 0;
1425
1426 if (s->enabled == SD_EVENT_OFF) {
1427 s->io.fd = fd;
1428 s->io.registered = false;
1429 } else {
1430 int saved_fd;
1431
1432 saved_fd = s->io.fd;
1433 assert(s->io.registered);
1434
1435 s->io.fd = fd;
1436 s->io.registered = false;
1437
1438 r = source_io_register(s, s->enabled, s->io.events);
1439 if (r < 0) {
1440 s->io.fd = saved_fd;
1441 s->io.registered = true;
1442 return r;
1443 }
1444
1445 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1446 }
1447
1448 return 0;
1449 }
1450
1451 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1452 assert_return(s, -EINVAL);
1453 assert_return(events, -EINVAL);
1454 assert_return(s->type == SOURCE_IO, -EDOM);
1455 assert_return(!event_pid_changed(s->event), -ECHILD);
1456
1457 *events = s->io.events;
1458 return 0;
1459 }
1460
1461 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1462 int r;
1463
1464 assert_return(s, -EINVAL);
1465 assert_return(s->type == SOURCE_IO, -EDOM);
1466 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1467 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1468 assert_return(!event_pid_changed(s->event), -ECHILD);
1469
1470 /* edge-triggered updates are never skipped, so we can reset edges */
1471 if (s->io.events == events && !(events & EPOLLET))
1472 return 0;
1473
1474 if (s->enabled != SD_EVENT_OFF) {
1475 r = source_io_register(s, s->enabled, events);
1476 if (r < 0)
1477 return r;
1478 }
1479
1480 s->io.events = events;
1481 source_set_pending(s, false);
1482
1483 return 0;
1484 }
1485
1486 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1487 assert_return(s, -EINVAL);
1488 assert_return(revents, -EINVAL);
1489 assert_return(s->type == SOURCE_IO, -EDOM);
1490 assert_return(s->pending, -ENODATA);
1491 assert_return(!event_pid_changed(s->event), -ECHILD);
1492
1493 *revents = s->io.revents;
1494 return 0;
1495 }
1496
1497 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1498 assert_return(s, -EINVAL);
1499 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1500 assert_return(!event_pid_changed(s->event), -ECHILD);
1501
1502 return s->signal.sig;
1503 }
1504
1505 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1506 assert_return(s, -EINVAL);
1507 assert_return(!event_pid_changed(s->event), -ECHILD);
1508
1509 return s->priority;
1510 }
1511
1512 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1513 int r;
1514
1515 assert_return(s, -EINVAL);
1516 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1517 assert_return(!event_pid_changed(s->event), -ECHILD);
1518
1519 if (s->priority == priority)
1520 return 0;
1521
1522 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1523 struct signal_data *old, *d;
1524
1525 /* Move us from the signalfd belonging to the old
1526 * priority to the signalfd of the new priority */
1527
1528 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1529
1530 s->priority = priority;
1531
1532 r = event_make_signal_data(s->event, s->signal.sig, &d);
1533 if (r < 0) {
1534 s->priority = old->priority;
1535 return r;
1536 }
1537
1538 event_unmask_signal_data(s->event, old, s->signal.sig);
1539 } else
1540 s->priority = priority;
1541
1542 if (s->pending)
1543 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1544
1545 if (s->prepare)
1546 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1547
1548 if (s->type == SOURCE_EXIT)
1549 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1550
1551 return 0;
1552 }
1553
1554 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1555 assert_return(s, -EINVAL);
1556 assert_return(m, -EINVAL);
1557 assert_return(!event_pid_changed(s->event), -ECHILD);
1558
1559 *m = s->enabled;
1560 return 0;
1561 }
1562
1563 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1564 int r;
1565
1566 assert_return(s, -EINVAL);
1567 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1568 assert_return(!event_pid_changed(s->event), -ECHILD);
1569
1570 /* If we are dead anyway, we are fine with turning off
1571 * sources, but everything else needs to fail. */
1572 if (s->event->state == SD_EVENT_FINISHED)
1573 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1574
1575 if (s->enabled == m)
1576 return 0;
1577
1578 if (m == SD_EVENT_OFF) {
1579
1580 switch (s->type) {
1581
1582 case SOURCE_IO:
1583 source_io_unregister(s);
1584 s->enabled = m;
1585 break;
1586
1587 case SOURCE_TIME_REALTIME:
1588 case SOURCE_TIME_BOOTTIME:
1589 case SOURCE_TIME_MONOTONIC:
1590 case SOURCE_TIME_REALTIME_ALARM:
1591 case SOURCE_TIME_BOOTTIME_ALARM: {
1592 struct clock_data *d;
1593
1594 s->enabled = m;
1595 d = event_get_clock_data(s->event, s->type);
1596 assert(d);
1597
1598 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1599 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1600 d->needs_rearm = true;
1601 break;
1602 }
1603
1604 case SOURCE_SIGNAL:
1605 s->enabled = m;
1606
1607 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1608 break;
1609
1610 case SOURCE_CHILD:
1611 s->enabled = m;
1612
1613 assert(s->event->n_enabled_child_sources > 0);
1614 s->event->n_enabled_child_sources--;
1615
1616 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1617 break;
1618
1619 case SOURCE_EXIT:
1620 s->enabled = m;
1621 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1622 break;
1623
1624 case SOURCE_DEFER:
1625 case SOURCE_POST:
1626 s->enabled = m;
1627 break;
1628
1629 default:
1630 assert_not_reached("Wut? I shouldn't exist.");
1631 }
1632
1633 } else {
1634 switch (s->type) {
1635
1636 case SOURCE_IO:
1637 r = source_io_register(s, m, s->io.events);
1638 if (r < 0)
1639 return r;
1640
1641 s->enabled = m;
1642 break;
1643
1644 case SOURCE_TIME_REALTIME:
1645 case SOURCE_TIME_BOOTTIME:
1646 case SOURCE_TIME_MONOTONIC:
1647 case SOURCE_TIME_REALTIME_ALARM:
1648 case SOURCE_TIME_BOOTTIME_ALARM: {
1649 struct clock_data *d;
1650
1651 s->enabled = m;
1652 d = event_get_clock_data(s->event, s->type);
1653 assert(d);
1654
1655 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1656 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1657 d->needs_rearm = true;
1658 break;
1659 }
1660
1661 case SOURCE_SIGNAL:
1662
1663 s->enabled = m;
1664
1665 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1666 if (r < 0) {
1667 s->enabled = SD_EVENT_OFF;
1668 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1669 return r;
1670 }
1671
1672 break;
1673
1674 case SOURCE_CHILD:
1675
1676 if (s->enabled == SD_EVENT_OFF)
1677 s->event->n_enabled_child_sources++;
1678
1679 s->enabled = m;
1680
1681 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1682 if (r < 0) {
1683 s->enabled = SD_EVENT_OFF;
1684 s->event->n_enabled_child_sources--;
1685 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1686 return r;
1687 }
1688
1689 break;
1690
1691 case SOURCE_EXIT:
1692 s->enabled = m;
1693 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1694 break;
1695
1696 case SOURCE_DEFER:
1697 case SOURCE_POST:
1698 s->enabled = m;
1699 break;
1700
1701 default:
1702 assert_not_reached("Wut? I shouldn't exist.");
1703 }
1704 }
1705
1706 if (s->pending)
1707 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1708
1709 if (s->prepare)
1710 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1711
1712 return 0;
1713 }
1714
1715 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1716 assert_return(s, -EINVAL);
1717 assert_return(usec, -EINVAL);
1718 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1719 assert_return(!event_pid_changed(s->event), -ECHILD);
1720
1721 *usec = s->time.next;
1722 return 0;
1723 }
1724
1725 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1726 struct clock_data *d;
1727
1728 assert_return(s, -EINVAL);
1729 assert_return(usec != (uint64_t) -1, -EINVAL);
1730 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1731 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1732 assert_return(!event_pid_changed(s->event), -ECHILD);
1733
1734 s->time.next = usec;
1735
1736 source_set_pending(s, false);
1737
1738 d = event_get_clock_data(s->event, s->type);
1739 assert(d);
1740
1741 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1742 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1743 d->needs_rearm = true;
1744
1745 return 0;
1746 }
1747
1748 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1749 assert_return(s, -EINVAL);
1750 assert_return(usec, -EINVAL);
1751 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1752 assert_return(!event_pid_changed(s->event), -ECHILD);
1753
1754 *usec = s->time.accuracy;
1755 return 0;
1756 }
1757
1758 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1759 struct clock_data *d;
1760
1761 assert_return(s, -EINVAL);
1762 assert_return(usec != (uint64_t) -1, -EINVAL);
1763 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1764 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1765 assert_return(!event_pid_changed(s->event), -ECHILD);
1766
1767 if (usec == 0)
1768 usec = DEFAULT_ACCURACY_USEC;
1769
1770 s->time.accuracy = usec;
1771
1772 source_set_pending(s, false);
1773
1774 d = event_get_clock_data(s->event, s->type);
1775 assert(d);
1776
1777 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1778 d->needs_rearm = true;
1779
1780 return 0;
1781 }
1782
1783 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1784 assert_return(s, -EINVAL);
1785 assert_return(clock, -EINVAL);
1786 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1787 assert_return(!event_pid_changed(s->event), -ECHILD);
1788
1789 *clock = event_source_type_to_clock(s->type);
1790 return 0;
1791 }
1792
1793 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1794 assert_return(s, -EINVAL);
1795 assert_return(pid, -EINVAL);
1796 assert_return(s->type == SOURCE_CHILD, -EDOM);
1797 assert_return(!event_pid_changed(s->event), -ECHILD);
1798
1799 *pid = s->child.pid;
1800 return 0;
1801 }
1802
1803 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1804 int r;
1805
1806 assert_return(s, -EINVAL);
1807 assert_return(s->type != SOURCE_EXIT, -EDOM);
1808 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1809 assert_return(!event_pid_changed(s->event), -ECHILD);
1810
1811 if (s->prepare == callback)
1812 return 0;
1813
1814 if (callback && s->prepare) {
1815 s->prepare = callback;
1816 return 0;
1817 }
1818
1819 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1820 if (r < 0)
1821 return r;
1822
1823 s->prepare = callback;
1824
1825 if (callback) {
1826 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1827 if (r < 0)
1828 return r;
1829 } else
1830 prioq_remove(s->event->prepare, s, &s->prepare_index);
1831
1832 return 0;
1833 }
1834
1835 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1836 assert_return(s, NULL);
1837
1838 return s->userdata;
1839 }
1840
1841 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1842 void *ret;
1843
1844 assert_return(s, NULL);
1845
1846 ret = s->userdata;
1847 s->userdata = userdata;
1848
1849 return ret;
1850 }
1851
1852 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1853 usec_t c;
1854 assert(e);
1855 assert(a <= b);
1856
1857 if (a <= 0)
1858 return 0;
1859
1860 if (b <= a + 1)
1861 return a;
1862
1863 initialize_perturb(e);
1864
1865 /*
1866 Find a good time to wake up again between times a and b. We
1867 have two goals here:
1868
1869 a) We want to wake up as seldom as possible, hence prefer
1870 later times over earlier times.
1871
1872 b) But if we have to wake up, then let's make sure to
1873 dispatch as much as possible on the entire system.
1874
1875 We implement this by waking up everywhere at the same time
1876 within any given minute if we can, synchronised via the
1877 perturbation value determined from the boot ID. If we can't,
1878 then we try to find the same spot in every 10s, then 1s and
1879 then 250ms step. Otherwise, we pick the last possible time
1880 to wake up.
1881 */
1882
1883 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1884 if (c >= b) {
1885 if (_unlikely_(c < USEC_PER_MINUTE))
1886 return b;
1887
1888 c -= USEC_PER_MINUTE;
1889 }
1890
1891 if (c >= a)
1892 return c;
1893
1894 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1895 if (c >= b) {
1896 if (_unlikely_(c < USEC_PER_SEC*10))
1897 return b;
1898
1899 c -= USEC_PER_SEC*10;
1900 }
1901
1902 if (c >= a)
1903 return c;
1904
1905 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1906 if (c >= b) {
1907 if (_unlikely_(c < USEC_PER_SEC))
1908 return b;
1909
1910 c -= USEC_PER_SEC;
1911 }
1912
1913 if (c >= a)
1914 return c;
1915
1916 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1917 if (c >= b) {
1918 if (_unlikely_(c < USEC_PER_MSEC*250))
1919 return b;
1920
1921 c -= USEC_PER_MSEC*250;
1922 }
1923
1924 if (c >= a)
1925 return c;
1926
1927 return b;
1928 }
1929
1930 static int event_arm_timer(
1931 sd_event *e,
1932 struct clock_data *d) {
1933
1934 struct itimerspec its = {};
1935 sd_event_source *a, *b;
1936 usec_t t;
1937 int r;
1938
1939 assert(e);
1940 assert(d);
1941
1942 if (!d->needs_rearm)
1943 return 0;
1944 else
1945 d->needs_rearm = false;
1946
1947 a = prioq_peek(d->earliest);
1948 if (!a || a->enabled == SD_EVENT_OFF) {
1949
1950 if (d->fd < 0)
1951 return 0;
1952
1953 if (d->next == USEC_INFINITY)
1954 return 0;
1955
1956 /* disarm */
1957 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1958 if (r < 0)
1959 return r;
1960
1961 d->next = USEC_INFINITY;
1962 return 0;
1963 }
1964
1965 b = prioq_peek(d->latest);
1966 assert_se(b && b->enabled != SD_EVENT_OFF);
1967
1968 t = sleep_between(e, a->time.next, b->time.next + b->time.accuracy);
1969 if (d->next == t)
1970 return 0;
1971
1972 assert_se(d->fd >= 0);
1973
1974 if (t == 0) {
1975 /* We don' want to disarm here, just mean some time looooong ago. */
1976 its.it_value.tv_sec = 0;
1977 its.it_value.tv_nsec = 1;
1978 } else
1979 timespec_store(&its.it_value, t);
1980
1981 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1982 if (r < 0)
1983 return -errno;
1984
1985 d->next = t;
1986 return 0;
1987 }
1988
1989 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
1990 assert(e);
1991 assert(s);
1992 assert(s->type == SOURCE_IO);
1993
1994 /* If the event source was already pending, we just OR in the
1995 * new revents, otherwise we reset the value. The ORing is
1996 * necessary to handle EPOLLONESHOT events properly where
1997 * readability might happen independently of writability, and
1998 * we need to keep track of both */
1999
2000 if (s->pending)
2001 s->io.revents |= revents;
2002 else
2003 s->io.revents = revents;
2004
2005 return source_set_pending(s, true);
2006 }
2007
2008 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2009 uint64_t x;
2010 ssize_t ss;
2011
2012 assert(e);
2013 assert(fd >= 0);
2014
2015 assert_return(events == EPOLLIN, -EIO);
2016
2017 ss = read(fd, &x, sizeof(x));
2018 if (ss < 0) {
2019 if (errno == EAGAIN || errno == EINTR)
2020 return 0;
2021
2022 return -errno;
2023 }
2024
2025 if (_unlikely_(ss != sizeof(x)))
2026 return -EIO;
2027
2028 if (next)
2029 *next = USEC_INFINITY;
2030
2031 return 0;
2032 }
2033
2034 static int process_timer(
2035 sd_event *e,
2036 usec_t n,
2037 struct clock_data *d) {
2038
2039 sd_event_source *s;
2040 int r;
2041
2042 assert(e);
2043 assert(d);
2044
2045 for (;;) {
2046 s = prioq_peek(d->earliest);
2047 if (!s ||
2048 s->time.next > n ||
2049 s->enabled == SD_EVENT_OFF ||
2050 s->pending)
2051 break;
2052
2053 r = source_set_pending(s, true);
2054 if (r < 0)
2055 return r;
2056
2057 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2058 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2059 d->needs_rearm = true;
2060 }
2061
2062 return 0;
2063 }
2064
2065 static int process_child(sd_event *e) {
2066 sd_event_source *s;
2067 Iterator i;
2068 int r;
2069
2070 assert(e);
2071
2072 e->need_process_child = false;
2073
2074 /*
2075 So, this is ugly. We iteratively invoke waitid() with P_PID
2076 + WNOHANG for each PID we wait for, instead of using
2077 P_ALL. This is because we only want to get child
2078 information of very specific child processes, and not all
2079 of them. We might not have processed the SIGCHLD even of a
2080 previous invocation and we don't want to maintain a
2081 unbounded *per-child* event queue, hence we really don't
2082 want anything flushed out of the kernel's queue that we
2083 don't care about. Since this is O(n) this means that if you
2084 have a lot of processes you probably want to handle SIGCHLD
2085 yourself.
2086
2087 We do not reap the children here (by using WNOWAIT), this
2088 is only done after the event source is dispatched so that
2089 the callback still sees the process as a zombie.
2090 */
2091
2092 HASHMAP_FOREACH(s, e->child_sources, i) {
2093 assert(s->type == SOURCE_CHILD);
2094
2095 if (s->pending)
2096 continue;
2097
2098 if (s->enabled == SD_EVENT_OFF)
2099 continue;
2100
2101 zero(s->child.siginfo);
2102 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2103 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2104 if (r < 0)
2105 return -errno;
2106
2107 if (s->child.siginfo.si_pid != 0) {
2108 bool zombie =
2109 s->child.siginfo.si_code == CLD_EXITED ||
2110 s->child.siginfo.si_code == CLD_KILLED ||
2111 s->child.siginfo.si_code == CLD_DUMPED;
2112
2113 if (!zombie && (s->child.options & WEXITED)) {
2114 /* If the child isn't dead then let's
2115 * immediately remove the state change
2116 * from the queue, since there's no
2117 * benefit in leaving it queued */
2118
2119 assert(s->child.options & (WSTOPPED|WCONTINUED));
2120 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2121 }
2122
2123 r = source_set_pending(s, true);
2124 if (r < 0)
2125 return r;
2126 }
2127 }
2128
2129 return 0;
2130 }
2131
2132 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2133 bool read_one = false;
2134 int r;
2135
2136 assert(e);
2137 assert_return(events == EPOLLIN, -EIO);
2138
2139 /* If there's a signal queued on this priority and SIGCHLD is
2140 on this priority too, then make sure to recheck the
2141 children we watch. This is because we only ever dequeue
2142 the first signal per priority, and if we dequeue one, and
2143 SIGCHLD might be enqueued later we wouldn't know, but we
2144 might have higher priority children we care about hence we
2145 need to check that explicitly. */
2146
2147 if (sigismember(&d->sigset, SIGCHLD))
2148 e->need_process_child = true;
2149
2150 /* If there's already an event source pending for this
2151 * priority we don't read another */
2152 if (d->current)
2153 return 0;
2154
2155 for (;;) {
2156 struct signalfd_siginfo si;
2157 ssize_t n;
2158 sd_event_source *s = NULL;
2159
2160 n = read(d->fd, &si, sizeof(si));
2161 if (n < 0) {
2162 if (errno == EAGAIN || errno == EINTR)
2163 return read_one;
2164
2165 return -errno;
2166 }
2167
2168 if (_unlikely_(n != sizeof(si)))
2169 return -EIO;
2170
2171 assert(si.ssi_signo < _NSIG);
2172
2173 read_one = true;
2174
2175 if (e->signal_sources)
2176 s = e->signal_sources[si.ssi_signo];
2177 if (!s)
2178 continue;
2179 if (s->pending)
2180 continue;
2181
2182 s->signal.siginfo = si;
2183 d->current = s;
2184
2185 r = source_set_pending(s, true);
2186 if (r < 0)
2187 return r;
2188
2189 return 1;
2190 }
2191 }
2192
2193 static int source_dispatch(sd_event_source *s) {
2194 int r = 0;
2195
2196 assert(s);
2197 assert(s->pending || s->type == SOURCE_EXIT);
2198
2199 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2200 r = source_set_pending(s, false);
2201 if (r < 0)
2202 return r;
2203 }
2204
2205 if (s->type != SOURCE_POST) {
2206 sd_event_source *z;
2207 Iterator i;
2208
2209 /* If we execute a non-post source, let's mark all
2210 * post sources as pending */
2211
2212 SET_FOREACH(z, s->event->post_sources, i) {
2213 if (z->enabled == SD_EVENT_OFF)
2214 continue;
2215
2216 r = source_set_pending(z, true);
2217 if (r < 0)
2218 return r;
2219 }
2220 }
2221
2222 if (s->enabled == SD_EVENT_ONESHOT) {
2223 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2224 if (r < 0)
2225 return r;
2226 }
2227
2228 s->dispatching = true;
2229
2230 switch (s->type) {
2231
2232 case SOURCE_IO:
2233 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2234 break;
2235
2236 case SOURCE_TIME_REALTIME:
2237 case SOURCE_TIME_BOOTTIME:
2238 case SOURCE_TIME_MONOTONIC:
2239 case SOURCE_TIME_REALTIME_ALARM:
2240 case SOURCE_TIME_BOOTTIME_ALARM:
2241 r = s->time.callback(s, s->time.next, s->userdata);
2242 break;
2243
2244 case SOURCE_SIGNAL:
2245 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2246 break;
2247
2248 case SOURCE_CHILD: {
2249 bool zombie;
2250
2251 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2252 s->child.siginfo.si_code == CLD_KILLED ||
2253 s->child.siginfo.si_code == CLD_DUMPED;
2254
2255 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2256
2257 /* Now, reap the PID for good. */
2258 if (zombie)
2259 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2260
2261 break;
2262 }
2263
2264 case SOURCE_DEFER:
2265 r = s->defer.callback(s, s->userdata);
2266 break;
2267
2268 case SOURCE_POST:
2269 r = s->post.callback(s, s->userdata);
2270 break;
2271
2272 case SOURCE_EXIT:
2273 r = s->exit.callback(s, s->userdata);
2274 break;
2275
2276 case SOURCE_WATCHDOG:
2277 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2278 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2279 assert_not_reached("Wut? I shouldn't exist.");
2280 }
2281
2282 s->dispatching = false;
2283
2284 if (r < 0) {
2285 if (s->description)
2286 log_debug_errno(r, "Event source '%s' returned error, disabling: %m", s->description);
2287 else
2288 log_debug_errno(r, "Event source %p returned error, disabling: %m", s);
2289 }
2290
2291 if (s->n_ref == 0)
2292 source_free(s);
2293 else if (r < 0)
2294 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2295
2296 return 1;
2297 }
2298
2299 static int event_prepare(sd_event *e) {
2300 int r;
2301
2302 assert(e);
2303
2304 for (;;) {
2305 sd_event_source *s;
2306
2307 s = prioq_peek(e->prepare);
2308 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2309 break;
2310
2311 s->prepare_iteration = e->iteration;
2312 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2313 if (r < 0)
2314 return r;
2315
2316 assert(s->prepare);
2317
2318 s->dispatching = true;
2319 r = s->prepare(s, s->userdata);
2320 s->dispatching = false;
2321
2322 if (r < 0) {
2323 if (s->description)
2324 log_debug_errno(r, "Prepare callback of event source '%s' returned error, disabling: %m", s->description);
2325 else
2326 log_debug_errno(r, "Prepare callback of event source %p returned error, disabling: %m", s);
2327 }
2328
2329 if (s->n_ref == 0)
2330 source_free(s);
2331 else if (r < 0)
2332 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2333 }
2334
2335 return 0;
2336 }
2337
2338 static int dispatch_exit(sd_event *e) {
2339 sd_event_source *p;
2340 int r;
2341
2342 assert(e);
2343
2344 p = prioq_peek(e->exit);
2345 if (!p || p->enabled == SD_EVENT_OFF) {
2346 e->state = SD_EVENT_FINISHED;
2347 return 0;
2348 }
2349
2350 sd_event_ref(e);
2351 e->iteration++;
2352 e->state = SD_EVENT_EXITING;
2353
2354 r = source_dispatch(p);
2355
2356 e->state = SD_EVENT_INITIAL;
2357 sd_event_unref(e);
2358
2359 return r;
2360 }
2361
2362 static sd_event_source* event_next_pending(sd_event *e) {
2363 sd_event_source *p;
2364
2365 assert(e);
2366
2367 p = prioq_peek(e->pending);
2368 if (!p)
2369 return NULL;
2370
2371 if (p->enabled == SD_EVENT_OFF)
2372 return NULL;
2373
2374 return p;
2375 }
2376
2377 static int arm_watchdog(sd_event *e) {
2378 struct itimerspec its = {};
2379 usec_t t;
2380 int r;
2381
2382 assert(e);
2383 assert(e->watchdog_fd >= 0);
2384
2385 t = sleep_between(e,
2386 e->watchdog_last + (e->watchdog_period / 2),
2387 e->watchdog_last + (e->watchdog_period * 3 / 4));
2388
2389 timespec_store(&its.it_value, t);
2390
2391 /* Make sure we never set the watchdog to 0, which tells the
2392 * kernel to disable it. */
2393 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2394 its.it_value.tv_nsec = 1;
2395
2396 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2397 if (r < 0)
2398 return -errno;
2399
2400 return 0;
2401 }
2402
2403 static int process_watchdog(sd_event *e) {
2404 assert(e);
2405
2406 if (!e->watchdog)
2407 return 0;
2408
2409 /* Don't notify watchdog too often */
2410 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2411 return 0;
2412
2413 sd_notify(false, "WATCHDOG=1");
2414 e->watchdog_last = e->timestamp.monotonic;
2415
2416 return arm_watchdog(e);
2417 }
2418
2419 _public_ int sd_event_prepare(sd_event *e) {
2420 int r;
2421
2422 assert_return(e, -EINVAL);
2423 assert_return(!event_pid_changed(e), -ECHILD);
2424 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2425 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2426
2427 if (e->exit_requested)
2428 goto pending;
2429
2430 e->iteration++;
2431
2432 r = event_prepare(e);
2433 if (r < 0)
2434 return r;
2435
2436 r = event_arm_timer(e, &e->realtime);
2437 if (r < 0)
2438 return r;
2439
2440 r = event_arm_timer(e, &e->boottime);
2441 if (r < 0)
2442 return r;
2443
2444 r = event_arm_timer(e, &e->monotonic);
2445 if (r < 0)
2446 return r;
2447
2448 r = event_arm_timer(e, &e->realtime_alarm);
2449 if (r < 0)
2450 return r;
2451
2452 r = event_arm_timer(e, &e->boottime_alarm);
2453 if (r < 0)
2454 return r;
2455
2456 if (event_next_pending(e) || e->need_process_child)
2457 goto pending;
2458
2459 e->state = SD_EVENT_ARMED;
2460
2461 return 0;
2462
2463 pending:
2464 e->state = SD_EVENT_ARMED;
2465 r = sd_event_wait(e, 0);
2466 if (r == 0)
2467 e->state = SD_EVENT_ARMED;
2468
2469 return r;
2470 }
2471
2472 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2473 struct epoll_event *ev_queue;
2474 unsigned ev_queue_max;
2475 int r, m, i;
2476
2477 assert_return(e, -EINVAL);
2478 assert_return(!event_pid_changed(e), -ECHILD);
2479 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2480 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2481
2482 if (e->exit_requested) {
2483 e->state = SD_EVENT_PENDING;
2484 return 1;
2485 }
2486
2487 ev_queue_max = MAX(e->n_sources, 1u);
2488 ev_queue = newa(struct epoll_event, ev_queue_max);
2489
2490 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2491 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2492 if (m < 0) {
2493 if (errno == EINTR) {
2494 e->state = SD_EVENT_PENDING;
2495 return 1;
2496 }
2497
2498 r = -errno;
2499 goto finish;
2500 }
2501
2502 dual_timestamp_get(&e->timestamp);
2503 e->timestamp_boottime = now(CLOCK_BOOTTIME);
2504
2505 for (i = 0; i < m; i++) {
2506
2507 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2508 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2509 else {
2510 WakeupType *t = ev_queue[i].data.ptr;
2511
2512 switch (*t) {
2513
2514 case WAKEUP_EVENT_SOURCE:
2515 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2516 break;
2517
2518 case WAKEUP_CLOCK_DATA: {
2519 struct clock_data *d = ev_queue[i].data.ptr;
2520 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2521 break;
2522 }
2523
2524 case WAKEUP_SIGNAL_DATA:
2525 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2526 break;
2527
2528 default:
2529 assert_not_reached("Invalid wake-up pointer");
2530 }
2531 }
2532 if (r < 0)
2533 goto finish;
2534 }
2535
2536 r = process_watchdog(e);
2537 if (r < 0)
2538 goto finish;
2539
2540 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2541 if (r < 0)
2542 goto finish;
2543
2544 r = process_timer(e, e->timestamp_boottime, &e->boottime);
2545 if (r < 0)
2546 goto finish;
2547
2548 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2549 if (r < 0)
2550 goto finish;
2551
2552 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2553 if (r < 0)
2554 goto finish;
2555
2556 r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2557 if (r < 0)
2558 goto finish;
2559
2560 if (e->need_process_child) {
2561 r = process_child(e);
2562 if (r < 0)
2563 goto finish;
2564 }
2565
2566 if (event_next_pending(e)) {
2567 e->state = SD_EVENT_PENDING;
2568
2569 return 1;
2570 }
2571
2572 r = 0;
2573
2574 finish:
2575 e->state = SD_EVENT_INITIAL;
2576
2577 return r;
2578 }
2579
2580 _public_ int sd_event_dispatch(sd_event *e) {
2581 sd_event_source *p;
2582 int r;
2583
2584 assert_return(e, -EINVAL);
2585 assert_return(!event_pid_changed(e), -ECHILD);
2586 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2587 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2588
2589 if (e->exit_requested)
2590 return dispatch_exit(e);
2591
2592 p = event_next_pending(e);
2593 if (p) {
2594 sd_event_ref(e);
2595
2596 e->state = SD_EVENT_RUNNING;
2597 r = source_dispatch(p);
2598 e->state = SD_EVENT_INITIAL;
2599
2600 sd_event_unref(e);
2601
2602 return r;
2603 }
2604
2605 e->state = SD_EVENT_INITIAL;
2606
2607 return 1;
2608 }
2609
2610 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2611 int r;
2612
2613 assert_return(e, -EINVAL);
2614 assert_return(!event_pid_changed(e), -ECHILD);
2615 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2616 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2617
2618 r = sd_event_prepare(e);
2619 if (r == 0)
2620 /* There was nothing? Then wait... */
2621 r = sd_event_wait(e, timeout);
2622
2623 if (r > 0) {
2624 /* There's something now, then let's dispatch it */
2625 r = sd_event_dispatch(e);
2626 if (r < 0)
2627 return r;
2628
2629 return 1;
2630 }
2631
2632 return r;
2633 }
2634
2635 _public_ int sd_event_loop(sd_event *e) {
2636 int r;
2637
2638 assert_return(e, -EINVAL);
2639 assert_return(!event_pid_changed(e), -ECHILD);
2640 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2641
2642 sd_event_ref(e);
2643
2644 while (e->state != SD_EVENT_FINISHED) {
2645 r = sd_event_run(e, (uint64_t) -1);
2646 if (r < 0)
2647 goto finish;
2648 }
2649
2650 r = e->exit_code;
2651
2652 finish:
2653 sd_event_unref(e);
2654 return r;
2655 }
2656
2657 _public_ int sd_event_get_fd(sd_event *e) {
2658
2659 assert_return(e, -EINVAL);
2660 assert_return(!event_pid_changed(e), -ECHILD);
2661
2662 return e->epoll_fd;
2663 }
2664
2665 _public_ int sd_event_get_state(sd_event *e) {
2666 assert_return(e, -EINVAL);
2667 assert_return(!event_pid_changed(e), -ECHILD);
2668
2669 return e->state;
2670 }
2671
2672 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2673 assert_return(e, -EINVAL);
2674 assert_return(code, -EINVAL);
2675 assert_return(!event_pid_changed(e), -ECHILD);
2676
2677 if (!e->exit_requested)
2678 return -ENODATA;
2679
2680 *code = e->exit_code;
2681 return 0;
2682 }
2683
2684 _public_ int sd_event_exit(sd_event *e, int code) {
2685 assert_return(e, -EINVAL);
2686 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2687 assert_return(!event_pid_changed(e), -ECHILD);
2688
2689 e->exit_requested = true;
2690 e->exit_code = code;
2691
2692 return 0;
2693 }
2694
2695 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2696 assert_return(e, -EINVAL);
2697 assert_return(usec, -EINVAL);
2698 assert_return(!event_pid_changed(e), -ECHILD);
2699
2700 if (!dual_timestamp_is_set(&e->timestamp)) {
2701 /* Implicitly fall back to now() if we never ran
2702 * before and thus have no cached time. */
2703 *usec = now(clock);
2704 return 1;
2705 }
2706
2707 switch (clock) {
2708
2709 case CLOCK_REALTIME:
2710 case CLOCK_REALTIME_ALARM:
2711 *usec = e->timestamp.realtime;
2712 break;
2713
2714 case CLOCK_MONOTONIC:
2715 *usec = e->timestamp.monotonic;
2716 break;
2717
2718 case CLOCK_BOOTTIME:
2719 case CLOCK_BOOTTIME_ALARM:
2720 *usec = e->timestamp_boottime;
2721 break;
2722 }
2723
2724 return 0;
2725 }
2726
2727 _public_ int sd_event_default(sd_event **ret) {
2728
2729 static thread_local sd_event *default_event = NULL;
2730 sd_event *e = NULL;
2731 int r;
2732
2733 if (!ret)
2734 return !!default_event;
2735
2736 if (default_event) {
2737 *ret = sd_event_ref(default_event);
2738 return 0;
2739 }
2740
2741 r = sd_event_new(&e);
2742 if (r < 0)
2743 return r;
2744
2745 e->default_event_ptr = &default_event;
2746 e->tid = gettid();
2747 default_event = e;
2748
2749 *ret = e;
2750 return 1;
2751 }
2752
2753 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2754 assert_return(e, -EINVAL);
2755 assert_return(tid, -EINVAL);
2756 assert_return(!event_pid_changed(e), -ECHILD);
2757
2758 if (e->tid != 0) {
2759 *tid = e->tid;
2760 return 0;
2761 }
2762
2763 return -ENXIO;
2764 }
2765
2766 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2767 int r;
2768
2769 assert_return(e, -EINVAL);
2770 assert_return(!event_pid_changed(e), -ECHILD);
2771
2772 if (e->watchdog == !!b)
2773 return e->watchdog;
2774
2775 if (b) {
2776 struct epoll_event ev = {};
2777
2778 r = sd_watchdog_enabled(false, &e->watchdog_period);
2779 if (r <= 0)
2780 return r;
2781
2782 /* Issue first ping immediately */
2783 sd_notify(false, "WATCHDOG=1");
2784 e->watchdog_last = now(CLOCK_MONOTONIC);
2785
2786 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2787 if (e->watchdog_fd < 0)
2788 return -errno;
2789
2790 r = arm_watchdog(e);
2791 if (r < 0)
2792 goto fail;
2793
2794 ev.events = EPOLLIN;
2795 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2796
2797 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2798 if (r < 0) {
2799 r = -errno;
2800 goto fail;
2801 }
2802
2803 } else {
2804 if (e->watchdog_fd >= 0) {
2805 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2806 e->watchdog_fd = safe_close(e->watchdog_fd);
2807 }
2808 }
2809
2810 e->watchdog = !!b;
2811 return e->watchdog;
2812
2813 fail:
2814 e->watchdog_fd = safe_close(e->watchdog_fd);
2815 return r;
2816 }
2817
2818 _public_ int sd_event_get_watchdog(sd_event *e) {
2819 assert_return(e, -EINVAL);
2820 assert_return(!event_pid_changed(e), -ECHILD);
2821
2822 return e->watchdog;
2823 }