]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Merge pull request #2495 from heftig/master
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /***
2 This file is part of systemd.
3
4 Copyright 2013 Lennart Poettering
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18 ***/
19
20 #include <sys/epoll.h>
21 #include <sys/timerfd.h>
22 #include <sys/wait.h>
23
24 #include "sd-daemon.h"
25 #include "sd-event.h"
26 #include "sd-id128.h"
27
28 #include "alloc-util.h"
29 #include "fd-util.h"
30 #include "hashmap.h"
31 #include "list.h"
32 #include "macro.h"
33 #include "missing.h"
34 #include "prioq.h"
35 #include "process-util.h"
36 #include "set.h"
37 #include "signal-util.h"
38 #include "string-table.h"
39 #include "string-util.h"
40 #include "time-util.h"
41 #include "util.h"
42
43 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
44
45 typedef enum EventSourceType {
46 SOURCE_IO,
47 SOURCE_TIME_REALTIME,
48 SOURCE_TIME_BOOTTIME,
49 SOURCE_TIME_MONOTONIC,
50 SOURCE_TIME_REALTIME_ALARM,
51 SOURCE_TIME_BOOTTIME_ALARM,
52 SOURCE_SIGNAL,
53 SOURCE_CHILD,
54 SOURCE_DEFER,
55 SOURCE_POST,
56 SOURCE_EXIT,
57 SOURCE_WATCHDOG,
58 _SOURCE_EVENT_SOURCE_TYPE_MAX,
59 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
60 } EventSourceType;
61
62 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
63 [SOURCE_IO] = "io",
64 [SOURCE_TIME_REALTIME] = "realtime",
65 [SOURCE_TIME_BOOTTIME] = "bootime",
66 [SOURCE_TIME_MONOTONIC] = "monotonic",
67 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
68 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
69 [SOURCE_SIGNAL] = "signal",
70 [SOURCE_CHILD] = "child",
71 [SOURCE_DEFER] = "defer",
72 [SOURCE_POST] = "post",
73 [SOURCE_EXIT] = "exit",
74 [SOURCE_WATCHDOG] = "watchdog",
75 };
76
77 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
78
79 /* All objects we use in epoll events start with this value, so that
80 * we know how to dispatch it */
81 typedef enum WakeupType {
82 WAKEUP_NONE,
83 WAKEUP_EVENT_SOURCE,
84 WAKEUP_CLOCK_DATA,
85 WAKEUP_SIGNAL_DATA,
86 _WAKEUP_TYPE_MAX,
87 _WAKEUP_TYPE_INVALID = -1,
88 } WakeupType;
89
90 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
91
92 struct sd_event_source {
93 WakeupType wakeup;
94
95 unsigned n_ref;
96
97 sd_event *event;
98 void *userdata;
99 sd_event_handler_t prepare;
100
101 char *description;
102
103 EventSourceType type:5;
104 int enabled:3;
105 bool pending:1;
106 bool dispatching:1;
107 bool floating:1;
108
109 int64_t priority;
110 unsigned pending_index;
111 unsigned prepare_index;
112 unsigned pending_iteration;
113 unsigned prepare_iteration;
114
115 LIST_FIELDS(sd_event_source, sources);
116
117 union {
118 struct {
119 sd_event_io_handler_t callback;
120 int fd;
121 uint32_t events;
122 uint32_t revents;
123 bool registered:1;
124 } io;
125 struct {
126 sd_event_time_handler_t callback;
127 usec_t next, accuracy;
128 unsigned earliest_index;
129 unsigned latest_index;
130 } time;
131 struct {
132 sd_event_signal_handler_t callback;
133 struct signalfd_siginfo siginfo;
134 int sig;
135 } signal;
136 struct {
137 sd_event_child_handler_t callback;
138 siginfo_t siginfo;
139 pid_t pid;
140 int options;
141 } child;
142 struct {
143 sd_event_handler_t callback;
144 } defer;
145 struct {
146 sd_event_handler_t callback;
147 } post;
148 struct {
149 sd_event_handler_t callback;
150 unsigned prioq_index;
151 } exit;
152 };
153 };
154
155 struct clock_data {
156 WakeupType wakeup;
157 int fd;
158
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
164 * wakeups in */
165
166 Prioq *earliest;
167 Prioq *latest;
168 usec_t next;
169
170 bool needs_rearm:1;
171 };
172
173 struct signal_data {
174 WakeupType wakeup;
175
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
178 * time. */
179
180 int fd;
181 int64_t priority;
182 sigset_t sigset;
183 sd_event_source *current;
184 };
185
186 struct sd_event {
187 unsigned n_ref;
188
189 int epoll_fd;
190 int watchdog_fd;
191
192 Prioq *pending;
193 Prioq *prepare;
194
195 /* timerfd_create() only supports these five clocks so far. We
196 * can add support for more clocks when the kernel learns to
197 * deal with them, too. */
198 struct clock_data realtime;
199 struct clock_data boottime;
200 struct clock_data monotonic;
201 struct clock_data realtime_alarm;
202 struct clock_data boottime_alarm;
203
204 usec_t perturb;
205
206 sd_event_source **signal_sources; /* indexed by signal number */
207 Hashmap *signal_data; /* indexed by priority */
208
209 Hashmap *child_sources;
210 unsigned n_enabled_child_sources;
211
212 Set *post_sources;
213
214 Prioq *exit;
215
216 pid_t original_pid;
217
218 unsigned iteration;
219 dual_timestamp timestamp;
220 usec_t timestamp_boottime;
221 int state;
222
223 bool exit_requested:1;
224 bool need_process_child:1;
225 bool watchdog:1;
226 bool profile_delays:1;
227
228 int exit_code;
229
230 pid_t tid;
231 sd_event **default_event_ptr;
232
233 usec_t watchdog_last, watchdog_period;
234
235 unsigned n_sources;
236
237 LIST_HEAD(sd_event_source, sources);
238
239 usec_t last_run, last_log;
240 unsigned delays[sizeof(usec_t) * 8];
241 };
242
243 static void source_disconnect(sd_event_source *s);
244
245 static int pending_prioq_compare(const void *a, const void *b) {
246 const sd_event_source *x = a, *y = b;
247
248 assert(x->pending);
249 assert(y->pending);
250
251 /* Enabled ones first */
252 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
253 return -1;
254 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
255 return 1;
256
257 /* Lower priority values first */
258 if (x->priority < y->priority)
259 return -1;
260 if (x->priority > y->priority)
261 return 1;
262
263 /* Older entries first */
264 if (x->pending_iteration < y->pending_iteration)
265 return -1;
266 if (x->pending_iteration > y->pending_iteration)
267 return 1;
268
269 return 0;
270 }
271
272 static int prepare_prioq_compare(const void *a, const void *b) {
273 const sd_event_source *x = a, *y = b;
274
275 assert(x->prepare);
276 assert(y->prepare);
277
278 /* Enabled ones first */
279 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
280 return -1;
281 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
282 return 1;
283
284 /* Move most recently prepared ones last, so that we can stop
285 * preparing as soon as we hit one that has already been
286 * prepared in the current iteration */
287 if (x->prepare_iteration < y->prepare_iteration)
288 return -1;
289 if (x->prepare_iteration > y->prepare_iteration)
290 return 1;
291
292 /* Lower priority values first */
293 if (x->priority < y->priority)
294 return -1;
295 if (x->priority > y->priority)
296 return 1;
297
298 return 0;
299 }
300
301 static int earliest_time_prioq_compare(const void *a, const void *b) {
302 const sd_event_source *x = a, *y = b;
303
304 assert(EVENT_SOURCE_IS_TIME(x->type));
305 assert(x->type == y->type);
306
307 /* Enabled ones first */
308 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
309 return -1;
310 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
311 return 1;
312
313 /* Move the pending ones to the end */
314 if (!x->pending && y->pending)
315 return -1;
316 if (x->pending && !y->pending)
317 return 1;
318
319 /* Order by time */
320 if (x->time.next < y->time.next)
321 return -1;
322 if (x->time.next > y->time.next)
323 return 1;
324
325 return 0;
326 }
327
328 static usec_t time_event_source_latest(const sd_event_source *s) {
329 return usec_add(s->time.next, s->time.accuracy);
330 }
331
332 static int latest_time_prioq_compare(const void *a, const void *b) {
333 const sd_event_source *x = a, *y = b;
334
335 assert(EVENT_SOURCE_IS_TIME(x->type));
336 assert(x->type == y->type);
337
338 /* Enabled ones first */
339 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
340 return -1;
341 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
342 return 1;
343
344 /* Move the pending ones to the end */
345 if (!x->pending && y->pending)
346 return -1;
347 if (x->pending && !y->pending)
348 return 1;
349
350 /* Order by time */
351 if (time_event_source_latest(x) < time_event_source_latest(y))
352 return -1;
353 if (time_event_source_latest(x) > time_event_source_latest(y))
354 return 1;
355
356 return 0;
357 }
358
359 static int exit_prioq_compare(const void *a, const void *b) {
360 const sd_event_source *x = a, *y = b;
361
362 assert(x->type == SOURCE_EXIT);
363 assert(y->type == SOURCE_EXIT);
364
365 /* Enabled ones first */
366 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
367 return -1;
368 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
369 return 1;
370
371 /* Lower priority values first */
372 if (x->priority < y->priority)
373 return -1;
374 if (x->priority > y->priority)
375 return 1;
376
377 return 0;
378 }
379
380 static void free_clock_data(struct clock_data *d) {
381 assert(d);
382 assert(d->wakeup == WAKEUP_CLOCK_DATA);
383
384 safe_close(d->fd);
385 prioq_free(d->earliest);
386 prioq_free(d->latest);
387 }
388
389 static void event_free(sd_event *e) {
390 sd_event_source *s;
391
392 assert(e);
393
394 while ((s = e->sources)) {
395 assert(s->floating);
396 source_disconnect(s);
397 sd_event_source_unref(s);
398 }
399
400 assert(e->n_sources == 0);
401
402 if (e->default_event_ptr)
403 *(e->default_event_ptr) = NULL;
404
405 safe_close(e->epoll_fd);
406 safe_close(e->watchdog_fd);
407
408 free_clock_data(&e->realtime);
409 free_clock_data(&e->boottime);
410 free_clock_data(&e->monotonic);
411 free_clock_data(&e->realtime_alarm);
412 free_clock_data(&e->boottime_alarm);
413
414 prioq_free(e->pending);
415 prioq_free(e->prepare);
416 prioq_free(e->exit);
417
418 free(e->signal_sources);
419 hashmap_free(e->signal_data);
420
421 hashmap_free(e->child_sources);
422 set_free(e->post_sources);
423 free(e);
424 }
425
426 _public_ int sd_event_new(sd_event** ret) {
427 sd_event *e;
428 int r;
429
430 assert_return(ret, -EINVAL);
431
432 e = new0(sd_event, 1);
433 if (!e)
434 return -ENOMEM;
435
436 e->n_ref = 1;
437 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
438 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
439 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
440 e->original_pid = getpid();
441 e->perturb = USEC_INFINITY;
442
443 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
444 if (r < 0)
445 goto fail;
446
447 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
448 if (e->epoll_fd < 0) {
449 r = -errno;
450 goto fail;
451 }
452
453 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
454 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
455 e->profile_delays = true;
456 }
457
458 *ret = e;
459 return 0;
460
461 fail:
462 event_free(e);
463 return r;
464 }
465
466 _public_ sd_event* sd_event_ref(sd_event *e) {
467
468 if (!e)
469 return NULL;
470
471 assert(e->n_ref >= 1);
472 e->n_ref++;
473
474 return e;
475 }
476
477 _public_ sd_event* sd_event_unref(sd_event *e) {
478
479 if (!e)
480 return NULL;
481
482 assert(e->n_ref >= 1);
483 e->n_ref--;
484
485 if (e->n_ref <= 0)
486 event_free(e);
487
488 return NULL;
489 }
490
491 static bool event_pid_changed(sd_event *e) {
492 assert(e);
493
494 /* We don't support people creating an event loop and keeping
495 * it around over a fork(). Let's complain. */
496
497 return e->original_pid != getpid();
498 }
499
500 static void source_io_unregister(sd_event_source *s) {
501 int r;
502
503 assert(s);
504 assert(s->type == SOURCE_IO);
505
506 if (event_pid_changed(s->event))
507 return;
508
509 if (!s->io.registered)
510 return;
511
512 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
513 if (r < 0)
514 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
515 strna(s->description), event_source_type_to_string(s->type));
516
517 s->io.registered = false;
518 }
519
520 static int source_io_register(
521 sd_event_source *s,
522 int enabled,
523 uint32_t events) {
524
525 struct epoll_event ev = {};
526 int r;
527
528 assert(s);
529 assert(s->type == SOURCE_IO);
530 assert(enabled != SD_EVENT_OFF);
531
532 ev.events = events;
533 ev.data.ptr = s;
534
535 if (enabled == SD_EVENT_ONESHOT)
536 ev.events |= EPOLLONESHOT;
537
538 if (s->io.registered)
539 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
540 else
541 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
542 if (r < 0)
543 return -errno;
544
545 s->io.registered = true;
546
547 return 0;
548 }
549
550 static clockid_t event_source_type_to_clock(EventSourceType t) {
551
552 switch (t) {
553
554 case SOURCE_TIME_REALTIME:
555 return CLOCK_REALTIME;
556
557 case SOURCE_TIME_BOOTTIME:
558 return CLOCK_BOOTTIME;
559
560 case SOURCE_TIME_MONOTONIC:
561 return CLOCK_MONOTONIC;
562
563 case SOURCE_TIME_REALTIME_ALARM:
564 return CLOCK_REALTIME_ALARM;
565
566 case SOURCE_TIME_BOOTTIME_ALARM:
567 return CLOCK_BOOTTIME_ALARM;
568
569 default:
570 return (clockid_t) -1;
571 }
572 }
573
574 static EventSourceType clock_to_event_source_type(clockid_t clock) {
575
576 switch (clock) {
577
578 case CLOCK_REALTIME:
579 return SOURCE_TIME_REALTIME;
580
581 case CLOCK_BOOTTIME:
582 return SOURCE_TIME_BOOTTIME;
583
584 case CLOCK_MONOTONIC:
585 return SOURCE_TIME_MONOTONIC;
586
587 case CLOCK_REALTIME_ALARM:
588 return SOURCE_TIME_REALTIME_ALARM;
589
590 case CLOCK_BOOTTIME_ALARM:
591 return SOURCE_TIME_BOOTTIME_ALARM;
592
593 default:
594 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
595 }
596 }
597
598 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
599 assert(e);
600
601 switch (t) {
602
603 case SOURCE_TIME_REALTIME:
604 return &e->realtime;
605
606 case SOURCE_TIME_BOOTTIME:
607 return &e->boottime;
608
609 case SOURCE_TIME_MONOTONIC:
610 return &e->monotonic;
611
612 case SOURCE_TIME_REALTIME_ALARM:
613 return &e->realtime_alarm;
614
615 case SOURCE_TIME_BOOTTIME_ALARM:
616 return &e->boottime_alarm;
617
618 default:
619 return NULL;
620 }
621 }
622
623 static int event_make_signal_data(
624 sd_event *e,
625 int sig,
626 struct signal_data **ret) {
627
628 struct epoll_event ev = {};
629 struct signal_data *d;
630 bool added = false;
631 sigset_t ss_copy;
632 int64_t priority;
633 int r;
634
635 assert(e);
636
637 if (event_pid_changed(e))
638 return -ECHILD;
639
640 if (e->signal_sources && e->signal_sources[sig])
641 priority = e->signal_sources[sig]->priority;
642 else
643 priority = 0;
644
645 d = hashmap_get(e->signal_data, &priority);
646 if (d) {
647 if (sigismember(&d->sigset, sig) > 0) {
648 if (ret)
649 *ret = d;
650 return 0;
651 }
652 } else {
653 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
654 if (r < 0)
655 return r;
656
657 d = new0(struct signal_data, 1);
658 if (!d)
659 return -ENOMEM;
660
661 d->wakeup = WAKEUP_SIGNAL_DATA;
662 d->fd = -1;
663 d->priority = priority;
664
665 r = hashmap_put(e->signal_data, &d->priority, d);
666 if (r < 0) {
667 free(d);
668 return r;
669 }
670
671 added = true;
672 }
673
674 ss_copy = d->sigset;
675 assert_se(sigaddset(&ss_copy, sig) >= 0);
676
677 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
678 if (r < 0) {
679 r = -errno;
680 goto fail;
681 }
682
683 d->sigset = ss_copy;
684
685 if (d->fd >= 0) {
686 if (ret)
687 *ret = d;
688 return 0;
689 }
690
691 d->fd = r;
692
693 ev.events = EPOLLIN;
694 ev.data.ptr = d;
695
696 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
697 if (r < 0) {
698 r = -errno;
699 goto fail;
700 }
701
702 if (ret)
703 *ret = d;
704
705 return 0;
706
707 fail:
708 if (added) {
709 d->fd = safe_close(d->fd);
710 hashmap_remove(e->signal_data, &d->priority);
711 free(d);
712 }
713
714 return r;
715 }
716
717 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
718 assert(e);
719 assert(d);
720
721 /* Turns off the specified signal in the signal data
722 * object. If the signal mask of the object becomes empty that
723 * way removes it. */
724
725 if (sigismember(&d->sigset, sig) == 0)
726 return;
727
728 assert_se(sigdelset(&d->sigset, sig) >= 0);
729
730 if (sigisemptyset(&d->sigset)) {
731
732 /* If all the mask is all-zero we can get rid of the structure */
733 hashmap_remove(e->signal_data, &d->priority);
734 assert(!d->current);
735 safe_close(d->fd);
736 free(d);
737 return;
738 }
739
740 assert(d->fd >= 0);
741
742 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
743 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
744 }
745
746 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
747 struct signal_data *d;
748 static const int64_t zero_priority = 0;
749
750 assert(e);
751
752 /* Rechecks if the specified signal is still something we are
753 * interested in. If not, we'll unmask it, and possibly drop
754 * the signalfd for it. */
755
756 if (sig == SIGCHLD &&
757 e->n_enabled_child_sources > 0)
758 return;
759
760 if (e->signal_sources &&
761 e->signal_sources[sig] &&
762 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
763 return;
764
765 /*
766 * The specified signal might be enabled in three different queues:
767 *
768 * 1) the one that belongs to the priority passed (if it is non-NULL)
769 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
770 * 3) the 0 priority (to cover the SIGCHLD case)
771 *
772 * Hence, let's remove it from all three here.
773 */
774
775 if (priority) {
776 d = hashmap_get(e->signal_data, priority);
777 if (d)
778 event_unmask_signal_data(e, d, sig);
779 }
780
781 if (e->signal_sources && e->signal_sources[sig]) {
782 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
783 if (d)
784 event_unmask_signal_data(e, d, sig);
785 }
786
787 d = hashmap_get(e->signal_data, &zero_priority);
788 if (d)
789 event_unmask_signal_data(e, d, sig);
790 }
791
792 static void source_disconnect(sd_event_source *s) {
793 sd_event *event;
794
795 assert(s);
796
797 if (!s->event)
798 return;
799
800 assert(s->event->n_sources > 0);
801
802 switch (s->type) {
803
804 case SOURCE_IO:
805 if (s->io.fd >= 0)
806 source_io_unregister(s);
807
808 break;
809
810 case SOURCE_TIME_REALTIME:
811 case SOURCE_TIME_BOOTTIME:
812 case SOURCE_TIME_MONOTONIC:
813 case SOURCE_TIME_REALTIME_ALARM:
814 case SOURCE_TIME_BOOTTIME_ALARM: {
815 struct clock_data *d;
816
817 d = event_get_clock_data(s->event, s->type);
818 assert(d);
819
820 prioq_remove(d->earliest, s, &s->time.earliest_index);
821 prioq_remove(d->latest, s, &s->time.latest_index);
822 d->needs_rearm = true;
823 break;
824 }
825
826 case SOURCE_SIGNAL:
827 if (s->signal.sig > 0) {
828
829 if (s->event->signal_sources)
830 s->event->signal_sources[s->signal.sig] = NULL;
831
832 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
833 }
834
835 break;
836
837 case SOURCE_CHILD:
838 if (s->child.pid > 0) {
839 if (s->enabled != SD_EVENT_OFF) {
840 assert(s->event->n_enabled_child_sources > 0);
841 s->event->n_enabled_child_sources--;
842 }
843
844 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
845 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
846 }
847
848 break;
849
850 case SOURCE_DEFER:
851 /* nothing */
852 break;
853
854 case SOURCE_POST:
855 set_remove(s->event->post_sources, s);
856 break;
857
858 case SOURCE_EXIT:
859 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
860 break;
861
862 default:
863 assert_not_reached("Wut? I shouldn't exist.");
864 }
865
866 if (s->pending)
867 prioq_remove(s->event->pending, s, &s->pending_index);
868
869 if (s->prepare)
870 prioq_remove(s->event->prepare, s, &s->prepare_index);
871
872 event = s->event;
873
874 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
875 s->event = NULL;
876 LIST_REMOVE(sources, event->sources, s);
877 event->n_sources--;
878
879 if (!s->floating)
880 sd_event_unref(event);
881 }
882
883 static void source_free(sd_event_source *s) {
884 assert(s);
885
886 source_disconnect(s);
887 free(s->description);
888 free(s);
889 }
890
891 static int source_set_pending(sd_event_source *s, bool b) {
892 int r;
893
894 assert(s);
895 assert(s->type != SOURCE_EXIT);
896
897 if (s->pending == b)
898 return 0;
899
900 s->pending = b;
901
902 if (b) {
903 s->pending_iteration = s->event->iteration;
904
905 r = prioq_put(s->event->pending, s, &s->pending_index);
906 if (r < 0) {
907 s->pending = false;
908 return r;
909 }
910 } else
911 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
912
913 if (EVENT_SOURCE_IS_TIME(s->type)) {
914 struct clock_data *d;
915
916 d = event_get_clock_data(s->event, s->type);
917 assert(d);
918
919 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
920 prioq_reshuffle(d->latest, s, &s->time.latest_index);
921 d->needs_rearm = true;
922 }
923
924 if (s->type == SOURCE_SIGNAL && !b) {
925 struct signal_data *d;
926
927 d = hashmap_get(s->event->signal_data, &s->priority);
928 if (d && d->current == s)
929 d->current = NULL;
930 }
931
932 return 0;
933 }
934
935 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
936 sd_event_source *s;
937
938 assert(e);
939
940 s = new0(sd_event_source, 1);
941 if (!s)
942 return NULL;
943
944 s->n_ref = 1;
945 s->event = e;
946 s->floating = floating;
947 s->type = type;
948 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
949
950 if (!floating)
951 sd_event_ref(e);
952
953 LIST_PREPEND(sources, e->sources, s);
954 e->n_sources ++;
955
956 return s;
957 }
958
959 _public_ int sd_event_add_io(
960 sd_event *e,
961 sd_event_source **ret,
962 int fd,
963 uint32_t events,
964 sd_event_io_handler_t callback,
965 void *userdata) {
966
967 sd_event_source *s;
968 int r;
969
970 assert_return(e, -EINVAL);
971 assert_return(fd >= 0, -EBADF);
972 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
973 assert_return(callback, -EINVAL);
974 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
975 assert_return(!event_pid_changed(e), -ECHILD);
976
977 s = source_new(e, !ret, SOURCE_IO);
978 if (!s)
979 return -ENOMEM;
980
981 s->wakeup = WAKEUP_EVENT_SOURCE;
982 s->io.fd = fd;
983 s->io.events = events;
984 s->io.callback = callback;
985 s->userdata = userdata;
986 s->enabled = SD_EVENT_ON;
987
988 r = source_io_register(s, s->enabled, events);
989 if (r < 0) {
990 source_free(s);
991 return r;
992 }
993
994 if (ret)
995 *ret = s;
996
997 return 0;
998 }
999
1000 static void initialize_perturb(sd_event *e) {
1001 sd_id128_t bootid = {};
1002
1003 /* When we sleep for longer, we try to realign the wakeup to
1004 the same time wihtin each minute/second/250ms, so that
1005 events all across the system can be coalesced into a single
1006 CPU wakeup. However, let's take some system-specific
1007 randomness for this value, so that in a network of systems
1008 with synced clocks timer events are distributed a
1009 bit. Here, we calculate a perturbation usec offset from the
1010 boot ID. */
1011
1012 if (_likely_(e->perturb != USEC_INFINITY))
1013 return;
1014
1015 if (sd_id128_get_boot(&bootid) >= 0)
1016 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1017 }
1018
1019 static int event_setup_timer_fd(
1020 sd_event *e,
1021 struct clock_data *d,
1022 clockid_t clock) {
1023
1024 struct epoll_event ev = {};
1025 int r, fd;
1026
1027 assert(e);
1028 assert(d);
1029
1030 if (_likely_(d->fd >= 0))
1031 return 0;
1032
1033 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1034 if (fd < 0)
1035 return -errno;
1036
1037 ev.events = EPOLLIN;
1038 ev.data.ptr = d;
1039
1040 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1041 if (r < 0) {
1042 safe_close(fd);
1043 return -errno;
1044 }
1045
1046 d->fd = fd;
1047 return 0;
1048 }
1049
1050 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1051 assert(s);
1052
1053 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1054 }
1055
1056 _public_ int sd_event_add_time(
1057 sd_event *e,
1058 sd_event_source **ret,
1059 clockid_t clock,
1060 uint64_t usec,
1061 uint64_t accuracy,
1062 sd_event_time_handler_t callback,
1063 void *userdata) {
1064
1065 EventSourceType type;
1066 sd_event_source *s;
1067 struct clock_data *d;
1068 int r;
1069
1070 assert_return(e, -EINVAL);
1071 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1072 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1073 assert_return(!event_pid_changed(e), -ECHILD);
1074
1075 if (!callback)
1076 callback = time_exit_callback;
1077
1078 type = clock_to_event_source_type(clock);
1079 assert_return(type >= 0, -EOPNOTSUPP);
1080
1081 d = event_get_clock_data(e, type);
1082 assert(d);
1083
1084 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1085 if (r < 0)
1086 return r;
1087
1088 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1089 if (r < 0)
1090 return r;
1091
1092 if (d->fd < 0) {
1093 r = event_setup_timer_fd(e, d, clock);
1094 if (r < 0)
1095 return r;
1096 }
1097
1098 s = source_new(e, !ret, type);
1099 if (!s)
1100 return -ENOMEM;
1101
1102 s->time.next = usec;
1103 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1104 s->time.callback = callback;
1105 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1106 s->userdata = userdata;
1107 s->enabled = SD_EVENT_ONESHOT;
1108
1109 d->needs_rearm = true;
1110
1111 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1112 if (r < 0)
1113 goto fail;
1114
1115 r = prioq_put(d->latest, s, &s->time.latest_index);
1116 if (r < 0)
1117 goto fail;
1118
1119 if (ret)
1120 *ret = s;
1121
1122 return 0;
1123
1124 fail:
1125 source_free(s);
1126 return r;
1127 }
1128
1129 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1130 assert(s);
1131
1132 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1133 }
1134
1135 _public_ int sd_event_add_signal(
1136 sd_event *e,
1137 sd_event_source **ret,
1138 int sig,
1139 sd_event_signal_handler_t callback,
1140 void *userdata) {
1141
1142 sd_event_source *s;
1143 struct signal_data *d;
1144 sigset_t ss;
1145 int r;
1146
1147 assert_return(e, -EINVAL);
1148 assert_return(sig > 0, -EINVAL);
1149 assert_return(sig < _NSIG, -EINVAL);
1150 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1151 assert_return(!event_pid_changed(e), -ECHILD);
1152
1153 if (!callback)
1154 callback = signal_exit_callback;
1155
1156 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1157 if (r != 0)
1158 return -r;
1159
1160 if (!sigismember(&ss, sig))
1161 return -EBUSY;
1162
1163 if (!e->signal_sources) {
1164 e->signal_sources = new0(sd_event_source*, _NSIG);
1165 if (!e->signal_sources)
1166 return -ENOMEM;
1167 } else if (e->signal_sources[sig])
1168 return -EBUSY;
1169
1170 s = source_new(e, !ret, SOURCE_SIGNAL);
1171 if (!s)
1172 return -ENOMEM;
1173
1174 s->signal.sig = sig;
1175 s->signal.callback = callback;
1176 s->userdata = userdata;
1177 s->enabled = SD_EVENT_ON;
1178
1179 e->signal_sources[sig] = s;
1180
1181 r = event_make_signal_data(e, sig, &d);
1182 if (r < 0) {
1183 source_free(s);
1184 return r;
1185 }
1186
1187 /* Use the signal name as description for the event source by default */
1188 (void) sd_event_source_set_description(s, signal_to_string(sig));
1189
1190 if (ret)
1191 *ret = s;
1192
1193 return 0;
1194 }
1195
1196 _public_ int sd_event_add_child(
1197 sd_event *e,
1198 sd_event_source **ret,
1199 pid_t pid,
1200 int options,
1201 sd_event_child_handler_t callback,
1202 void *userdata) {
1203
1204 sd_event_source *s;
1205 int r;
1206
1207 assert_return(e, -EINVAL);
1208 assert_return(pid > 1, -EINVAL);
1209 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1210 assert_return(options != 0, -EINVAL);
1211 assert_return(callback, -EINVAL);
1212 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1213 assert_return(!event_pid_changed(e), -ECHILD);
1214
1215 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1216 if (r < 0)
1217 return r;
1218
1219 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1220 return -EBUSY;
1221
1222 s = source_new(e, !ret, SOURCE_CHILD);
1223 if (!s)
1224 return -ENOMEM;
1225
1226 s->child.pid = pid;
1227 s->child.options = options;
1228 s->child.callback = callback;
1229 s->userdata = userdata;
1230 s->enabled = SD_EVENT_ONESHOT;
1231
1232 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1233 if (r < 0) {
1234 source_free(s);
1235 return r;
1236 }
1237
1238 e->n_enabled_child_sources ++;
1239
1240 r = event_make_signal_data(e, SIGCHLD, NULL);
1241 if (r < 0) {
1242 e->n_enabled_child_sources--;
1243 source_free(s);
1244 return r;
1245 }
1246
1247 e->need_process_child = true;
1248
1249 if (ret)
1250 *ret = s;
1251
1252 return 0;
1253 }
1254
1255 _public_ int sd_event_add_defer(
1256 sd_event *e,
1257 sd_event_source **ret,
1258 sd_event_handler_t callback,
1259 void *userdata) {
1260
1261 sd_event_source *s;
1262 int r;
1263
1264 assert_return(e, -EINVAL);
1265 assert_return(callback, -EINVAL);
1266 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1267 assert_return(!event_pid_changed(e), -ECHILD);
1268
1269 s = source_new(e, !ret, SOURCE_DEFER);
1270 if (!s)
1271 return -ENOMEM;
1272
1273 s->defer.callback = callback;
1274 s->userdata = userdata;
1275 s->enabled = SD_EVENT_ONESHOT;
1276
1277 r = source_set_pending(s, true);
1278 if (r < 0) {
1279 source_free(s);
1280 return r;
1281 }
1282
1283 if (ret)
1284 *ret = s;
1285
1286 return 0;
1287 }
1288
1289 _public_ int sd_event_add_post(
1290 sd_event *e,
1291 sd_event_source **ret,
1292 sd_event_handler_t callback,
1293 void *userdata) {
1294
1295 sd_event_source *s;
1296 int r;
1297
1298 assert_return(e, -EINVAL);
1299 assert_return(callback, -EINVAL);
1300 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1301 assert_return(!event_pid_changed(e), -ECHILD);
1302
1303 r = set_ensure_allocated(&e->post_sources, NULL);
1304 if (r < 0)
1305 return r;
1306
1307 s = source_new(e, !ret, SOURCE_POST);
1308 if (!s)
1309 return -ENOMEM;
1310
1311 s->post.callback = callback;
1312 s->userdata = userdata;
1313 s->enabled = SD_EVENT_ON;
1314
1315 r = set_put(e->post_sources, s);
1316 if (r < 0) {
1317 source_free(s);
1318 return r;
1319 }
1320
1321 if (ret)
1322 *ret = s;
1323
1324 return 0;
1325 }
1326
1327 _public_ int sd_event_add_exit(
1328 sd_event *e,
1329 sd_event_source **ret,
1330 sd_event_handler_t callback,
1331 void *userdata) {
1332
1333 sd_event_source *s;
1334 int r;
1335
1336 assert_return(e, -EINVAL);
1337 assert_return(callback, -EINVAL);
1338 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1339 assert_return(!event_pid_changed(e), -ECHILD);
1340
1341 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1342 if (r < 0)
1343 return r;
1344
1345 s = source_new(e, !ret, SOURCE_EXIT);
1346 if (!s)
1347 return -ENOMEM;
1348
1349 s->exit.callback = callback;
1350 s->userdata = userdata;
1351 s->exit.prioq_index = PRIOQ_IDX_NULL;
1352 s->enabled = SD_EVENT_ONESHOT;
1353
1354 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1355 if (r < 0) {
1356 source_free(s);
1357 return r;
1358 }
1359
1360 if (ret)
1361 *ret = s;
1362
1363 return 0;
1364 }
1365
1366 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1367
1368 if (!s)
1369 return NULL;
1370
1371 assert(s->n_ref >= 1);
1372 s->n_ref++;
1373
1374 return s;
1375 }
1376
1377 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1378
1379 if (!s)
1380 return NULL;
1381
1382 assert(s->n_ref >= 1);
1383 s->n_ref--;
1384
1385 if (s->n_ref <= 0) {
1386 /* Here's a special hack: when we are called from a
1387 * dispatch handler we won't free the event source
1388 * immediately, but we will detach the fd from the
1389 * epoll. This way it is safe for the caller to unref
1390 * the event source and immediately close the fd, but
1391 * we still retain a valid event source object after
1392 * the callback. */
1393
1394 if (s->dispatching) {
1395 if (s->type == SOURCE_IO)
1396 source_io_unregister(s);
1397
1398 source_disconnect(s);
1399 } else
1400 source_free(s);
1401 }
1402
1403 return NULL;
1404 }
1405
1406 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1407 assert_return(s, -EINVAL);
1408 assert_return(!event_pid_changed(s->event), -ECHILD);
1409
1410 return free_and_strdup(&s->description, description);
1411 }
1412
1413 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1414 assert_return(s, -EINVAL);
1415 assert_return(description, -EINVAL);
1416 assert_return(s->description, -ENXIO);
1417 assert_return(!event_pid_changed(s->event), -ECHILD);
1418
1419 *description = s->description;
1420 return 0;
1421 }
1422
1423 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1424 assert_return(s, NULL);
1425
1426 return s->event;
1427 }
1428
1429 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1430 assert_return(s, -EINVAL);
1431 assert_return(s->type != SOURCE_EXIT, -EDOM);
1432 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1433 assert_return(!event_pid_changed(s->event), -ECHILD);
1434
1435 return s->pending;
1436 }
1437
1438 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1439 assert_return(s, -EINVAL);
1440 assert_return(s->type == SOURCE_IO, -EDOM);
1441 assert_return(!event_pid_changed(s->event), -ECHILD);
1442
1443 return s->io.fd;
1444 }
1445
1446 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1447 int r;
1448
1449 assert_return(s, -EINVAL);
1450 assert_return(fd >= 0, -EBADF);
1451 assert_return(s->type == SOURCE_IO, -EDOM);
1452 assert_return(!event_pid_changed(s->event), -ECHILD);
1453
1454 if (s->io.fd == fd)
1455 return 0;
1456
1457 if (s->enabled == SD_EVENT_OFF) {
1458 s->io.fd = fd;
1459 s->io.registered = false;
1460 } else {
1461 int saved_fd;
1462
1463 saved_fd = s->io.fd;
1464 assert(s->io.registered);
1465
1466 s->io.fd = fd;
1467 s->io.registered = false;
1468
1469 r = source_io_register(s, s->enabled, s->io.events);
1470 if (r < 0) {
1471 s->io.fd = saved_fd;
1472 s->io.registered = true;
1473 return r;
1474 }
1475
1476 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1477 }
1478
1479 return 0;
1480 }
1481
1482 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1483 assert_return(s, -EINVAL);
1484 assert_return(events, -EINVAL);
1485 assert_return(s->type == SOURCE_IO, -EDOM);
1486 assert_return(!event_pid_changed(s->event), -ECHILD);
1487
1488 *events = s->io.events;
1489 return 0;
1490 }
1491
1492 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1493 int r;
1494
1495 assert_return(s, -EINVAL);
1496 assert_return(s->type == SOURCE_IO, -EDOM);
1497 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1498 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1499 assert_return(!event_pid_changed(s->event), -ECHILD);
1500
1501 /* edge-triggered updates are never skipped, so we can reset edges */
1502 if (s->io.events == events && !(events & EPOLLET))
1503 return 0;
1504
1505 if (s->enabled != SD_EVENT_OFF) {
1506 r = source_io_register(s, s->enabled, events);
1507 if (r < 0)
1508 return r;
1509 }
1510
1511 s->io.events = events;
1512 source_set_pending(s, false);
1513
1514 return 0;
1515 }
1516
1517 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1518 assert_return(s, -EINVAL);
1519 assert_return(revents, -EINVAL);
1520 assert_return(s->type == SOURCE_IO, -EDOM);
1521 assert_return(s->pending, -ENODATA);
1522 assert_return(!event_pid_changed(s->event), -ECHILD);
1523
1524 *revents = s->io.revents;
1525 return 0;
1526 }
1527
1528 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1529 assert_return(s, -EINVAL);
1530 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1531 assert_return(!event_pid_changed(s->event), -ECHILD);
1532
1533 return s->signal.sig;
1534 }
1535
1536 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1537 assert_return(s, -EINVAL);
1538 assert_return(!event_pid_changed(s->event), -ECHILD);
1539
1540 return s->priority;
1541 }
1542
1543 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1544 int r;
1545
1546 assert_return(s, -EINVAL);
1547 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1548 assert_return(!event_pid_changed(s->event), -ECHILD);
1549
1550 if (s->priority == priority)
1551 return 0;
1552
1553 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1554 struct signal_data *old, *d;
1555
1556 /* Move us from the signalfd belonging to the old
1557 * priority to the signalfd of the new priority */
1558
1559 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1560
1561 s->priority = priority;
1562
1563 r = event_make_signal_data(s->event, s->signal.sig, &d);
1564 if (r < 0) {
1565 s->priority = old->priority;
1566 return r;
1567 }
1568
1569 event_unmask_signal_data(s->event, old, s->signal.sig);
1570 } else
1571 s->priority = priority;
1572
1573 if (s->pending)
1574 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1575
1576 if (s->prepare)
1577 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1578
1579 if (s->type == SOURCE_EXIT)
1580 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1581
1582 return 0;
1583 }
1584
1585 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1586 assert_return(s, -EINVAL);
1587 assert_return(m, -EINVAL);
1588 assert_return(!event_pid_changed(s->event), -ECHILD);
1589
1590 *m = s->enabled;
1591 return 0;
1592 }
1593
1594 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1595 int r;
1596
1597 assert_return(s, -EINVAL);
1598 assert_return(m == SD_EVENT_OFF || m == SD_EVENT_ON || m == SD_EVENT_ONESHOT, -EINVAL);
1599 assert_return(!event_pid_changed(s->event), -ECHILD);
1600
1601 /* If we are dead anyway, we are fine with turning off
1602 * sources, but everything else needs to fail. */
1603 if (s->event->state == SD_EVENT_FINISHED)
1604 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1605
1606 if (s->enabled == m)
1607 return 0;
1608
1609 if (m == SD_EVENT_OFF) {
1610
1611 switch (s->type) {
1612
1613 case SOURCE_IO:
1614 source_io_unregister(s);
1615 s->enabled = m;
1616 break;
1617
1618 case SOURCE_TIME_REALTIME:
1619 case SOURCE_TIME_BOOTTIME:
1620 case SOURCE_TIME_MONOTONIC:
1621 case SOURCE_TIME_REALTIME_ALARM:
1622 case SOURCE_TIME_BOOTTIME_ALARM: {
1623 struct clock_data *d;
1624
1625 s->enabled = m;
1626 d = event_get_clock_data(s->event, s->type);
1627 assert(d);
1628
1629 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1630 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1631 d->needs_rearm = true;
1632 break;
1633 }
1634
1635 case SOURCE_SIGNAL:
1636 s->enabled = m;
1637
1638 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1639 break;
1640
1641 case SOURCE_CHILD:
1642 s->enabled = m;
1643
1644 assert(s->event->n_enabled_child_sources > 0);
1645 s->event->n_enabled_child_sources--;
1646
1647 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1648 break;
1649
1650 case SOURCE_EXIT:
1651 s->enabled = m;
1652 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1653 break;
1654
1655 case SOURCE_DEFER:
1656 case SOURCE_POST:
1657 s->enabled = m;
1658 break;
1659
1660 default:
1661 assert_not_reached("Wut? I shouldn't exist.");
1662 }
1663
1664 } else {
1665 switch (s->type) {
1666
1667 case SOURCE_IO:
1668 r = source_io_register(s, m, s->io.events);
1669 if (r < 0)
1670 return r;
1671
1672 s->enabled = m;
1673 break;
1674
1675 case SOURCE_TIME_REALTIME:
1676 case SOURCE_TIME_BOOTTIME:
1677 case SOURCE_TIME_MONOTONIC:
1678 case SOURCE_TIME_REALTIME_ALARM:
1679 case SOURCE_TIME_BOOTTIME_ALARM: {
1680 struct clock_data *d;
1681
1682 s->enabled = m;
1683 d = event_get_clock_data(s->event, s->type);
1684 assert(d);
1685
1686 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1687 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1688 d->needs_rearm = true;
1689 break;
1690 }
1691
1692 case SOURCE_SIGNAL:
1693
1694 s->enabled = m;
1695
1696 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1697 if (r < 0) {
1698 s->enabled = SD_EVENT_OFF;
1699 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1700 return r;
1701 }
1702
1703 break;
1704
1705 case SOURCE_CHILD:
1706
1707 if (s->enabled == SD_EVENT_OFF)
1708 s->event->n_enabled_child_sources++;
1709
1710 s->enabled = m;
1711
1712 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1713 if (r < 0) {
1714 s->enabled = SD_EVENT_OFF;
1715 s->event->n_enabled_child_sources--;
1716 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1717 return r;
1718 }
1719
1720 break;
1721
1722 case SOURCE_EXIT:
1723 s->enabled = m;
1724 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1725 break;
1726
1727 case SOURCE_DEFER:
1728 case SOURCE_POST:
1729 s->enabled = m;
1730 break;
1731
1732 default:
1733 assert_not_reached("Wut? I shouldn't exist.");
1734 }
1735 }
1736
1737 if (s->pending)
1738 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1739
1740 if (s->prepare)
1741 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1742
1743 return 0;
1744 }
1745
1746 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1747 assert_return(s, -EINVAL);
1748 assert_return(usec, -EINVAL);
1749 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1750 assert_return(!event_pid_changed(s->event), -ECHILD);
1751
1752 *usec = s->time.next;
1753 return 0;
1754 }
1755
1756 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1757 struct clock_data *d;
1758
1759 assert_return(s, -EINVAL);
1760 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1761 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1762 assert_return(!event_pid_changed(s->event), -ECHILD);
1763
1764 s->time.next = usec;
1765
1766 source_set_pending(s, false);
1767
1768 d = event_get_clock_data(s->event, s->type);
1769 assert(d);
1770
1771 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1772 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1773 d->needs_rearm = true;
1774
1775 return 0;
1776 }
1777
1778 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1779 assert_return(s, -EINVAL);
1780 assert_return(usec, -EINVAL);
1781 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1782 assert_return(!event_pid_changed(s->event), -ECHILD);
1783
1784 *usec = s->time.accuracy;
1785 return 0;
1786 }
1787
1788 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1789 struct clock_data *d;
1790
1791 assert_return(s, -EINVAL);
1792 assert_return(usec != (uint64_t) -1, -EINVAL);
1793 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1794 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1795 assert_return(!event_pid_changed(s->event), -ECHILD);
1796
1797 if (usec == 0)
1798 usec = DEFAULT_ACCURACY_USEC;
1799
1800 s->time.accuracy = usec;
1801
1802 source_set_pending(s, false);
1803
1804 d = event_get_clock_data(s->event, s->type);
1805 assert(d);
1806
1807 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1808 d->needs_rearm = true;
1809
1810 return 0;
1811 }
1812
1813 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1814 assert_return(s, -EINVAL);
1815 assert_return(clock, -EINVAL);
1816 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1817 assert_return(!event_pid_changed(s->event), -ECHILD);
1818
1819 *clock = event_source_type_to_clock(s->type);
1820 return 0;
1821 }
1822
1823 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1824 assert_return(s, -EINVAL);
1825 assert_return(pid, -EINVAL);
1826 assert_return(s->type == SOURCE_CHILD, -EDOM);
1827 assert_return(!event_pid_changed(s->event), -ECHILD);
1828
1829 *pid = s->child.pid;
1830 return 0;
1831 }
1832
1833 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1834 int r;
1835
1836 assert_return(s, -EINVAL);
1837 assert_return(s->type != SOURCE_EXIT, -EDOM);
1838 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1839 assert_return(!event_pid_changed(s->event), -ECHILD);
1840
1841 if (s->prepare == callback)
1842 return 0;
1843
1844 if (callback && s->prepare) {
1845 s->prepare = callback;
1846 return 0;
1847 }
1848
1849 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1850 if (r < 0)
1851 return r;
1852
1853 s->prepare = callback;
1854
1855 if (callback) {
1856 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1857 if (r < 0)
1858 return r;
1859 } else
1860 prioq_remove(s->event->prepare, s, &s->prepare_index);
1861
1862 return 0;
1863 }
1864
1865 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1866 assert_return(s, NULL);
1867
1868 return s->userdata;
1869 }
1870
1871 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1872 void *ret;
1873
1874 assert_return(s, NULL);
1875
1876 ret = s->userdata;
1877 s->userdata = userdata;
1878
1879 return ret;
1880 }
1881
1882 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1883 usec_t c;
1884 assert(e);
1885 assert(a <= b);
1886
1887 if (a <= 0)
1888 return 0;
1889 if (a >= USEC_INFINITY)
1890 return USEC_INFINITY;
1891
1892 if (b <= a + 1)
1893 return a;
1894
1895 initialize_perturb(e);
1896
1897 /*
1898 Find a good time to wake up again between times a and b. We
1899 have two goals here:
1900
1901 a) We want to wake up as seldom as possible, hence prefer
1902 later times over earlier times.
1903
1904 b) But if we have to wake up, then let's make sure to
1905 dispatch as much as possible on the entire system.
1906
1907 We implement this by waking up everywhere at the same time
1908 within any given minute if we can, synchronised via the
1909 perturbation value determined from the boot ID. If we can't,
1910 then we try to find the same spot in every 10s, then 1s and
1911 then 250ms step. Otherwise, we pick the last possible time
1912 to wake up.
1913 */
1914
1915 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1916 if (c >= b) {
1917 if (_unlikely_(c < USEC_PER_MINUTE))
1918 return b;
1919
1920 c -= USEC_PER_MINUTE;
1921 }
1922
1923 if (c >= a)
1924 return c;
1925
1926 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1927 if (c >= b) {
1928 if (_unlikely_(c < USEC_PER_SEC*10))
1929 return b;
1930
1931 c -= USEC_PER_SEC*10;
1932 }
1933
1934 if (c >= a)
1935 return c;
1936
1937 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1938 if (c >= b) {
1939 if (_unlikely_(c < USEC_PER_SEC))
1940 return b;
1941
1942 c -= USEC_PER_SEC;
1943 }
1944
1945 if (c >= a)
1946 return c;
1947
1948 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1949 if (c >= b) {
1950 if (_unlikely_(c < USEC_PER_MSEC*250))
1951 return b;
1952
1953 c -= USEC_PER_MSEC*250;
1954 }
1955
1956 if (c >= a)
1957 return c;
1958
1959 return b;
1960 }
1961
1962 static int event_arm_timer(
1963 sd_event *e,
1964 struct clock_data *d) {
1965
1966 struct itimerspec its = {};
1967 sd_event_source *a, *b;
1968 usec_t t;
1969 int r;
1970
1971 assert(e);
1972 assert(d);
1973
1974 if (!d->needs_rearm)
1975 return 0;
1976 else
1977 d->needs_rearm = false;
1978
1979 a = prioq_peek(d->earliest);
1980 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
1981
1982 if (d->fd < 0)
1983 return 0;
1984
1985 if (d->next == USEC_INFINITY)
1986 return 0;
1987
1988 /* disarm */
1989 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
1990 if (r < 0)
1991 return r;
1992
1993 d->next = USEC_INFINITY;
1994 return 0;
1995 }
1996
1997 b = prioq_peek(d->latest);
1998 assert_se(b && b->enabled != SD_EVENT_OFF);
1999
2000 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2001 if (d->next == t)
2002 return 0;
2003
2004 assert_se(d->fd >= 0);
2005
2006 if (t == 0) {
2007 /* We don' want to disarm here, just mean some time looooong ago. */
2008 its.it_value.tv_sec = 0;
2009 its.it_value.tv_nsec = 1;
2010 } else
2011 timespec_store(&its.it_value, t);
2012
2013 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2014 if (r < 0)
2015 return -errno;
2016
2017 d->next = t;
2018 return 0;
2019 }
2020
2021 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2022 assert(e);
2023 assert(s);
2024 assert(s->type == SOURCE_IO);
2025
2026 /* If the event source was already pending, we just OR in the
2027 * new revents, otherwise we reset the value. The ORing is
2028 * necessary to handle EPOLLONESHOT events properly where
2029 * readability might happen independently of writability, and
2030 * we need to keep track of both */
2031
2032 if (s->pending)
2033 s->io.revents |= revents;
2034 else
2035 s->io.revents = revents;
2036
2037 return source_set_pending(s, true);
2038 }
2039
2040 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2041 uint64_t x;
2042 ssize_t ss;
2043
2044 assert(e);
2045 assert(fd >= 0);
2046
2047 assert_return(events == EPOLLIN, -EIO);
2048
2049 ss = read(fd, &x, sizeof(x));
2050 if (ss < 0) {
2051 if (errno == EAGAIN || errno == EINTR)
2052 return 0;
2053
2054 return -errno;
2055 }
2056
2057 if (_unlikely_(ss != sizeof(x)))
2058 return -EIO;
2059
2060 if (next)
2061 *next = USEC_INFINITY;
2062
2063 return 0;
2064 }
2065
2066 static int process_timer(
2067 sd_event *e,
2068 usec_t n,
2069 struct clock_data *d) {
2070
2071 sd_event_source *s;
2072 int r;
2073
2074 assert(e);
2075 assert(d);
2076
2077 for (;;) {
2078 s = prioq_peek(d->earliest);
2079 if (!s ||
2080 s->time.next > n ||
2081 s->enabled == SD_EVENT_OFF ||
2082 s->pending)
2083 break;
2084
2085 r = source_set_pending(s, true);
2086 if (r < 0)
2087 return r;
2088
2089 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2090 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2091 d->needs_rearm = true;
2092 }
2093
2094 return 0;
2095 }
2096
2097 static int process_child(sd_event *e) {
2098 sd_event_source *s;
2099 Iterator i;
2100 int r;
2101
2102 assert(e);
2103
2104 e->need_process_child = false;
2105
2106 /*
2107 So, this is ugly. We iteratively invoke waitid() with P_PID
2108 + WNOHANG for each PID we wait for, instead of using
2109 P_ALL. This is because we only want to get child
2110 information of very specific child processes, and not all
2111 of them. We might not have processed the SIGCHLD even of a
2112 previous invocation and we don't want to maintain a
2113 unbounded *per-child* event queue, hence we really don't
2114 want anything flushed out of the kernel's queue that we
2115 don't care about. Since this is O(n) this means that if you
2116 have a lot of processes you probably want to handle SIGCHLD
2117 yourself.
2118
2119 We do not reap the children here (by using WNOWAIT), this
2120 is only done after the event source is dispatched so that
2121 the callback still sees the process as a zombie.
2122 */
2123
2124 HASHMAP_FOREACH(s, e->child_sources, i) {
2125 assert(s->type == SOURCE_CHILD);
2126
2127 if (s->pending)
2128 continue;
2129
2130 if (s->enabled == SD_EVENT_OFF)
2131 continue;
2132
2133 zero(s->child.siginfo);
2134 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2135 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2136 if (r < 0)
2137 return -errno;
2138
2139 if (s->child.siginfo.si_pid != 0) {
2140 bool zombie =
2141 s->child.siginfo.si_code == CLD_EXITED ||
2142 s->child.siginfo.si_code == CLD_KILLED ||
2143 s->child.siginfo.si_code == CLD_DUMPED;
2144
2145 if (!zombie && (s->child.options & WEXITED)) {
2146 /* If the child isn't dead then let's
2147 * immediately remove the state change
2148 * from the queue, since there's no
2149 * benefit in leaving it queued */
2150
2151 assert(s->child.options & (WSTOPPED|WCONTINUED));
2152 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2153 }
2154
2155 r = source_set_pending(s, true);
2156 if (r < 0)
2157 return r;
2158 }
2159 }
2160
2161 return 0;
2162 }
2163
2164 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2165 bool read_one = false;
2166 int r;
2167
2168 assert(e);
2169 assert_return(events == EPOLLIN, -EIO);
2170
2171 /* If there's a signal queued on this priority and SIGCHLD is
2172 on this priority too, then make sure to recheck the
2173 children we watch. This is because we only ever dequeue
2174 the first signal per priority, and if we dequeue one, and
2175 SIGCHLD might be enqueued later we wouldn't know, but we
2176 might have higher priority children we care about hence we
2177 need to check that explicitly. */
2178
2179 if (sigismember(&d->sigset, SIGCHLD))
2180 e->need_process_child = true;
2181
2182 /* If there's already an event source pending for this
2183 * priority we don't read another */
2184 if (d->current)
2185 return 0;
2186
2187 for (;;) {
2188 struct signalfd_siginfo si;
2189 ssize_t n;
2190 sd_event_source *s = NULL;
2191
2192 n = read(d->fd, &si, sizeof(si));
2193 if (n < 0) {
2194 if (errno == EAGAIN || errno == EINTR)
2195 return read_one;
2196
2197 return -errno;
2198 }
2199
2200 if (_unlikely_(n != sizeof(si)))
2201 return -EIO;
2202
2203 assert(si.ssi_signo < _NSIG);
2204
2205 read_one = true;
2206
2207 if (e->signal_sources)
2208 s = e->signal_sources[si.ssi_signo];
2209 if (!s)
2210 continue;
2211 if (s->pending)
2212 continue;
2213
2214 s->signal.siginfo = si;
2215 d->current = s;
2216
2217 r = source_set_pending(s, true);
2218 if (r < 0)
2219 return r;
2220
2221 return 1;
2222 }
2223 }
2224
2225 static int source_dispatch(sd_event_source *s) {
2226 int r = 0;
2227
2228 assert(s);
2229 assert(s->pending || s->type == SOURCE_EXIT);
2230
2231 if (s->type != SOURCE_DEFER && s->type != SOURCE_EXIT) {
2232 r = source_set_pending(s, false);
2233 if (r < 0)
2234 return r;
2235 }
2236
2237 if (s->type != SOURCE_POST) {
2238 sd_event_source *z;
2239 Iterator i;
2240
2241 /* If we execute a non-post source, let's mark all
2242 * post sources as pending */
2243
2244 SET_FOREACH(z, s->event->post_sources, i) {
2245 if (z->enabled == SD_EVENT_OFF)
2246 continue;
2247
2248 r = source_set_pending(z, true);
2249 if (r < 0)
2250 return r;
2251 }
2252 }
2253
2254 if (s->enabled == SD_EVENT_ONESHOT) {
2255 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2256 if (r < 0)
2257 return r;
2258 }
2259
2260 s->dispatching = true;
2261
2262 switch (s->type) {
2263
2264 case SOURCE_IO:
2265 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2266 break;
2267
2268 case SOURCE_TIME_REALTIME:
2269 case SOURCE_TIME_BOOTTIME:
2270 case SOURCE_TIME_MONOTONIC:
2271 case SOURCE_TIME_REALTIME_ALARM:
2272 case SOURCE_TIME_BOOTTIME_ALARM:
2273 r = s->time.callback(s, s->time.next, s->userdata);
2274 break;
2275
2276 case SOURCE_SIGNAL:
2277 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2278 break;
2279
2280 case SOURCE_CHILD: {
2281 bool zombie;
2282
2283 zombie = s->child.siginfo.si_code == CLD_EXITED ||
2284 s->child.siginfo.si_code == CLD_KILLED ||
2285 s->child.siginfo.si_code == CLD_DUMPED;
2286
2287 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2288
2289 /* Now, reap the PID for good. */
2290 if (zombie)
2291 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2292
2293 break;
2294 }
2295
2296 case SOURCE_DEFER:
2297 r = s->defer.callback(s, s->userdata);
2298 break;
2299
2300 case SOURCE_POST:
2301 r = s->post.callback(s, s->userdata);
2302 break;
2303
2304 case SOURCE_EXIT:
2305 r = s->exit.callback(s, s->userdata);
2306 break;
2307
2308 case SOURCE_WATCHDOG:
2309 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2310 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2311 assert_not_reached("Wut? I shouldn't exist.");
2312 }
2313
2314 s->dispatching = false;
2315
2316 if (r < 0)
2317 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2318 strna(s->description), event_source_type_to_string(s->type));
2319
2320 if (s->n_ref == 0)
2321 source_free(s);
2322 else if (r < 0)
2323 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2324
2325 return 1;
2326 }
2327
2328 static int event_prepare(sd_event *e) {
2329 int r;
2330
2331 assert(e);
2332
2333 for (;;) {
2334 sd_event_source *s;
2335
2336 s = prioq_peek(e->prepare);
2337 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2338 break;
2339
2340 s->prepare_iteration = e->iteration;
2341 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2342 if (r < 0)
2343 return r;
2344
2345 assert(s->prepare);
2346
2347 s->dispatching = true;
2348 r = s->prepare(s, s->userdata);
2349 s->dispatching = false;
2350
2351 if (r < 0)
2352 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2353 strna(s->description), event_source_type_to_string(s->type));
2354
2355 if (s->n_ref == 0)
2356 source_free(s);
2357 else if (r < 0)
2358 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2359 }
2360
2361 return 0;
2362 }
2363
2364 static int dispatch_exit(sd_event *e) {
2365 sd_event_source *p;
2366 int r;
2367
2368 assert(e);
2369
2370 p = prioq_peek(e->exit);
2371 if (!p || p->enabled == SD_EVENT_OFF) {
2372 e->state = SD_EVENT_FINISHED;
2373 return 0;
2374 }
2375
2376 sd_event_ref(e);
2377 e->iteration++;
2378 e->state = SD_EVENT_EXITING;
2379
2380 r = source_dispatch(p);
2381
2382 e->state = SD_EVENT_INITIAL;
2383 sd_event_unref(e);
2384
2385 return r;
2386 }
2387
2388 static sd_event_source* event_next_pending(sd_event *e) {
2389 sd_event_source *p;
2390
2391 assert(e);
2392
2393 p = prioq_peek(e->pending);
2394 if (!p)
2395 return NULL;
2396
2397 if (p->enabled == SD_EVENT_OFF)
2398 return NULL;
2399
2400 return p;
2401 }
2402
2403 static int arm_watchdog(sd_event *e) {
2404 struct itimerspec its = {};
2405 usec_t t;
2406 int r;
2407
2408 assert(e);
2409 assert(e->watchdog_fd >= 0);
2410
2411 t = sleep_between(e,
2412 e->watchdog_last + (e->watchdog_period / 2),
2413 e->watchdog_last + (e->watchdog_period * 3 / 4));
2414
2415 timespec_store(&its.it_value, t);
2416
2417 /* Make sure we never set the watchdog to 0, which tells the
2418 * kernel to disable it. */
2419 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2420 its.it_value.tv_nsec = 1;
2421
2422 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2423 if (r < 0)
2424 return -errno;
2425
2426 return 0;
2427 }
2428
2429 static int process_watchdog(sd_event *e) {
2430 assert(e);
2431
2432 if (!e->watchdog)
2433 return 0;
2434
2435 /* Don't notify watchdog too often */
2436 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2437 return 0;
2438
2439 sd_notify(false, "WATCHDOG=1");
2440 e->watchdog_last = e->timestamp.monotonic;
2441
2442 return arm_watchdog(e);
2443 }
2444
2445 _public_ int sd_event_prepare(sd_event *e) {
2446 int r;
2447
2448 assert_return(e, -EINVAL);
2449 assert_return(!event_pid_changed(e), -ECHILD);
2450 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2451 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2452
2453 if (e->exit_requested)
2454 goto pending;
2455
2456 e->iteration++;
2457
2458 e->state = SD_EVENT_PREPARING;
2459 r = event_prepare(e);
2460 e->state = SD_EVENT_INITIAL;
2461 if (r < 0)
2462 return r;
2463
2464 r = event_arm_timer(e, &e->realtime);
2465 if (r < 0)
2466 return r;
2467
2468 r = event_arm_timer(e, &e->boottime);
2469 if (r < 0)
2470 return r;
2471
2472 r = event_arm_timer(e, &e->monotonic);
2473 if (r < 0)
2474 return r;
2475
2476 r = event_arm_timer(e, &e->realtime_alarm);
2477 if (r < 0)
2478 return r;
2479
2480 r = event_arm_timer(e, &e->boottime_alarm);
2481 if (r < 0)
2482 return r;
2483
2484 if (event_next_pending(e) || e->need_process_child)
2485 goto pending;
2486
2487 e->state = SD_EVENT_ARMED;
2488
2489 return 0;
2490
2491 pending:
2492 e->state = SD_EVENT_ARMED;
2493 r = sd_event_wait(e, 0);
2494 if (r == 0)
2495 e->state = SD_EVENT_ARMED;
2496
2497 return r;
2498 }
2499
2500 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2501 struct epoll_event *ev_queue;
2502 unsigned ev_queue_max;
2503 int r, m, i;
2504
2505 assert_return(e, -EINVAL);
2506 assert_return(!event_pid_changed(e), -ECHILD);
2507 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2508 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2509
2510 if (e->exit_requested) {
2511 e->state = SD_EVENT_PENDING;
2512 return 1;
2513 }
2514
2515 ev_queue_max = MAX(e->n_sources, 1u);
2516 ev_queue = newa(struct epoll_event, ev_queue_max);
2517
2518 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2519 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2520 if (m < 0) {
2521 if (errno == EINTR) {
2522 e->state = SD_EVENT_PENDING;
2523 return 1;
2524 }
2525
2526 r = -errno;
2527 goto finish;
2528 }
2529
2530 dual_timestamp_get(&e->timestamp);
2531 e->timestamp_boottime = now(CLOCK_BOOTTIME);
2532
2533 for (i = 0; i < m; i++) {
2534
2535 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2536 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2537 else {
2538 WakeupType *t = ev_queue[i].data.ptr;
2539
2540 switch (*t) {
2541
2542 case WAKEUP_EVENT_SOURCE:
2543 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2544 break;
2545
2546 case WAKEUP_CLOCK_DATA: {
2547 struct clock_data *d = ev_queue[i].data.ptr;
2548 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2549 break;
2550 }
2551
2552 case WAKEUP_SIGNAL_DATA:
2553 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2554 break;
2555
2556 default:
2557 assert_not_reached("Invalid wake-up pointer");
2558 }
2559 }
2560 if (r < 0)
2561 goto finish;
2562 }
2563
2564 r = process_watchdog(e);
2565 if (r < 0)
2566 goto finish;
2567
2568 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2569 if (r < 0)
2570 goto finish;
2571
2572 r = process_timer(e, e->timestamp_boottime, &e->boottime);
2573 if (r < 0)
2574 goto finish;
2575
2576 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2577 if (r < 0)
2578 goto finish;
2579
2580 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2581 if (r < 0)
2582 goto finish;
2583
2584 r = process_timer(e, e->timestamp_boottime, &e->boottime_alarm);
2585 if (r < 0)
2586 goto finish;
2587
2588 if (e->need_process_child) {
2589 r = process_child(e);
2590 if (r < 0)
2591 goto finish;
2592 }
2593
2594 if (event_next_pending(e)) {
2595 e->state = SD_EVENT_PENDING;
2596
2597 return 1;
2598 }
2599
2600 r = 0;
2601
2602 finish:
2603 e->state = SD_EVENT_INITIAL;
2604
2605 return r;
2606 }
2607
2608 _public_ int sd_event_dispatch(sd_event *e) {
2609 sd_event_source *p;
2610 int r;
2611
2612 assert_return(e, -EINVAL);
2613 assert_return(!event_pid_changed(e), -ECHILD);
2614 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2615 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2616
2617 if (e->exit_requested)
2618 return dispatch_exit(e);
2619
2620 p = event_next_pending(e);
2621 if (p) {
2622 sd_event_ref(e);
2623
2624 e->state = SD_EVENT_RUNNING;
2625 r = source_dispatch(p);
2626 e->state = SD_EVENT_INITIAL;
2627
2628 sd_event_unref(e);
2629
2630 return r;
2631 }
2632
2633 e->state = SD_EVENT_INITIAL;
2634
2635 return 1;
2636 }
2637
2638 static void event_log_delays(sd_event *e) {
2639 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2640 unsigned i;
2641 int o;
2642
2643 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2644 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2645 e->delays[i] = 0;
2646 }
2647 log_debug("Event loop iterations: %.*s", o, b);
2648 }
2649
2650 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2651 int r;
2652
2653 assert_return(e, -EINVAL);
2654 assert_return(!event_pid_changed(e), -ECHILD);
2655 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2656 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2657
2658 if (e->profile_delays && e->last_run) {
2659 usec_t this_run;
2660 unsigned l;
2661
2662 this_run = now(CLOCK_MONOTONIC);
2663
2664 l = u64log2(this_run - e->last_run);
2665 assert(l < sizeof(e->delays));
2666 e->delays[l]++;
2667
2668 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2669 event_log_delays(e);
2670 e->last_log = this_run;
2671 }
2672 }
2673
2674 r = sd_event_prepare(e);
2675 if (r == 0)
2676 /* There was nothing? Then wait... */
2677 r = sd_event_wait(e, timeout);
2678
2679 if (e->profile_delays)
2680 e->last_run = now(CLOCK_MONOTONIC);
2681
2682 if (r > 0) {
2683 /* There's something now, then let's dispatch it */
2684 r = sd_event_dispatch(e);
2685 if (r < 0)
2686 return r;
2687
2688 return 1;
2689 }
2690
2691 return r;
2692 }
2693
2694 _public_ int sd_event_loop(sd_event *e) {
2695 int r;
2696
2697 assert_return(e, -EINVAL);
2698 assert_return(!event_pid_changed(e), -ECHILD);
2699 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2700
2701 sd_event_ref(e);
2702
2703 while (e->state != SD_EVENT_FINISHED) {
2704 r = sd_event_run(e, (uint64_t) -1);
2705 if (r < 0)
2706 goto finish;
2707 }
2708
2709 r = e->exit_code;
2710
2711 finish:
2712 sd_event_unref(e);
2713 return r;
2714 }
2715
2716 _public_ int sd_event_get_fd(sd_event *e) {
2717
2718 assert_return(e, -EINVAL);
2719 assert_return(!event_pid_changed(e), -ECHILD);
2720
2721 return e->epoll_fd;
2722 }
2723
2724 _public_ int sd_event_get_state(sd_event *e) {
2725 assert_return(e, -EINVAL);
2726 assert_return(!event_pid_changed(e), -ECHILD);
2727
2728 return e->state;
2729 }
2730
2731 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2732 assert_return(e, -EINVAL);
2733 assert_return(code, -EINVAL);
2734 assert_return(!event_pid_changed(e), -ECHILD);
2735
2736 if (!e->exit_requested)
2737 return -ENODATA;
2738
2739 *code = e->exit_code;
2740 return 0;
2741 }
2742
2743 _public_ int sd_event_exit(sd_event *e, int code) {
2744 assert_return(e, -EINVAL);
2745 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2746 assert_return(!event_pid_changed(e), -ECHILD);
2747
2748 e->exit_requested = true;
2749 e->exit_code = code;
2750
2751 return 0;
2752 }
2753
2754 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2755 assert_return(e, -EINVAL);
2756 assert_return(usec, -EINVAL);
2757 assert_return(!event_pid_changed(e), -ECHILD);
2758 assert_return(IN_SET(clock,
2759 CLOCK_REALTIME,
2760 CLOCK_REALTIME_ALARM,
2761 CLOCK_MONOTONIC,
2762 CLOCK_BOOTTIME,
2763 CLOCK_BOOTTIME_ALARM), -EOPNOTSUPP);
2764
2765 if (!dual_timestamp_is_set(&e->timestamp)) {
2766 /* Implicitly fall back to now() if we never ran
2767 * before and thus have no cached time. */
2768 *usec = now(clock);
2769 return 1;
2770 }
2771
2772 switch (clock) {
2773
2774 case CLOCK_REALTIME:
2775 case CLOCK_REALTIME_ALARM:
2776 *usec = e->timestamp.realtime;
2777 break;
2778
2779 case CLOCK_MONOTONIC:
2780 *usec = e->timestamp.monotonic;
2781 break;
2782
2783 default:
2784 *usec = e->timestamp_boottime;
2785 break;
2786 }
2787
2788 return 0;
2789 }
2790
2791 _public_ int sd_event_default(sd_event **ret) {
2792
2793 static thread_local sd_event *default_event = NULL;
2794 sd_event *e = NULL;
2795 int r;
2796
2797 if (!ret)
2798 return !!default_event;
2799
2800 if (default_event) {
2801 *ret = sd_event_ref(default_event);
2802 return 0;
2803 }
2804
2805 r = sd_event_new(&e);
2806 if (r < 0)
2807 return r;
2808
2809 e->default_event_ptr = &default_event;
2810 e->tid = gettid();
2811 default_event = e;
2812
2813 *ret = e;
2814 return 1;
2815 }
2816
2817 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2818 assert_return(e, -EINVAL);
2819 assert_return(tid, -EINVAL);
2820 assert_return(!event_pid_changed(e), -ECHILD);
2821
2822 if (e->tid != 0) {
2823 *tid = e->tid;
2824 return 0;
2825 }
2826
2827 return -ENXIO;
2828 }
2829
2830 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2831 int r;
2832
2833 assert_return(e, -EINVAL);
2834 assert_return(!event_pid_changed(e), -ECHILD);
2835
2836 if (e->watchdog == !!b)
2837 return e->watchdog;
2838
2839 if (b) {
2840 struct epoll_event ev = {};
2841
2842 r = sd_watchdog_enabled(false, &e->watchdog_period);
2843 if (r <= 0)
2844 return r;
2845
2846 /* Issue first ping immediately */
2847 sd_notify(false, "WATCHDOG=1");
2848 e->watchdog_last = now(CLOCK_MONOTONIC);
2849
2850 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2851 if (e->watchdog_fd < 0)
2852 return -errno;
2853
2854 r = arm_watchdog(e);
2855 if (r < 0)
2856 goto fail;
2857
2858 ev.events = EPOLLIN;
2859 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2860
2861 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2862 if (r < 0) {
2863 r = -errno;
2864 goto fail;
2865 }
2866
2867 } else {
2868 if (e->watchdog_fd >= 0) {
2869 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2870 e->watchdog_fd = safe_close(e->watchdog_fd);
2871 }
2872 }
2873
2874 e->watchdog = !!b;
2875 return e->watchdog;
2876
2877 fail:
2878 e->watchdog_fd = safe_close(e->watchdog_fd);
2879 return r;
2880 }
2881
2882 _public_ int sd_event_get_watchdog(sd_event *e) {
2883 assert_return(e, -EINVAL);
2884 assert_return(!event_pid_changed(e), -ECHILD);
2885
2886 return e->watchdog;
2887 }