]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Add fd close support to sd_event_source
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2013 Lennart Poettering
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19 ***/
20
21 #include <sys/epoll.h>
22 #include <sys/timerfd.h>
23 #include <sys/wait.h>
24
25 #include "sd-daemon.h"
26 #include "sd-event.h"
27 #include "sd-id128.h"
28
29 #include "alloc-util.h"
30 #include "fd-util.h"
31 #include "hashmap.h"
32 #include "list.h"
33 #include "macro.h"
34 #include "missing.h"
35 #include "prioq.h"
36 #include "process-util.h"
37 #include "set.h"
38 #include "signal-util.h"
39 #include "string-table.h"
40 #include "string-util.h"
41 #include "time-util.h"
42 #include "util.h"
43
44 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
45
46 typedef enum EventSourceType {
47 SOURCE_IO,
48 SOURCE_TIME_REALTIME,
49 SOURCE_TIME_BOOTTIME,
50 SOURCE_TIME_MONOTONIC,
51 SOURCE_TIME_REALTIME_ALARM,
52 SOURCE_TIME_BOOTTIME_ALARM,
53 SOURCE_SIGNAL,
54 SOURCE_CHILD,
55 SOURCE_DEFER,
56 SOURCE_POST,
57 SOURCE_EXIT,
58 SOURCE_WATCHDOG,
59 _SOURCE_EVENT_SOURCE_TYPE_MAX,
60 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
61 } EventSourceType;
62
63 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
64 [SOURCE_IO] = "io",
65 [SOURCE_TIME_REALTIME] = "realtime",
66 [SOURCE_TIME_BOOTTIME] = "bootime",
67 [SOURCE_TIME_MONOTONIC] = "monotonic",
68 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
69 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
70 [SOURCE_SIGNAL] = "signal",
71 [SOURCE_CHILD] = "child",
72 [SOURCE_DEFER] = "defer",
73 [SOURCE_POST] = "post",
74 [SOURCE_EXIT] = "exit",
75 [SOURCE_WATCHDOG] = "watchdog",
76 };
77
78 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
79
80 /* All objects we use in epoll events start with this value, so that
81 * we know how to dispatch it */
82 typedef enum WakeupType {
83 WAKEUP_NONE,
84 WAKEUP_EVENT_SOURCE,
85 WAKEUP_CLOCK_DATA,
86 WAKEUP_SIGNAL_DATA,
87 _WAKEUP_TYPE_MAX,
88 _WAKEUP_TYPE_INVALID = -1,
89 } WakeupType;
90
91 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
92
93 struct sd_event_source {
94 WakeupType wakeup;
95
96 unsigned n_ref;
97
98 sd_event *event;
99 void *userdata;
100 sd_event_handler_t prepare;
101
102 char *description;
103
104 EventSourceType type:5;
105 int enabled:3;
106 bool pending:1;
107 bool dispatching:1;
108 bool floating:1;
109
110 int64_t priority;
111 unsigned pending_index;
112 unsigned prepare_index;
113 uint64_t pending_iteration;
114 uint64_t prepare_iteration;
115
116 LIST_FIELDS(sd_event_source, sources);
117
118 union {
119 struct {
120 sd_event_io_handler_t callback;
121 int fd;
122 uint32_t events;
123 uint32_t revents;
124 bool registered:1;
125 bool owned:1;
126 } io;
127 struct {
128 sd_event_time_handler_t callback;
129 usec_t next, accuracy;
130 unsigned earliest_index;
131 unsigned latest_index;
132 } time;
133 struct {
134 sd_event_signal_handler_t callback;
135 struct signalfd_siginfo siginfo;
136 int sig;
137 } signal;
138 struct {
139 sd_event_child_handler_t callback;
140 siginfo_t siginfo;
141 pid_t pid;
142 int options;
143 } child;
144 struct {
145 sd_event_handler_t callback;
146 } defer;
147 struct {
148 sd_event_handler_t callback;
149 } post;
150 struct {
151 sd_event_handler_t callback;
152 unsigned prioq_index;
153 } exit;
154 };
155 };
156
157 struct clock_data {
158 WakeupType wakeup;
159 int fd;
160
161 /* For all clocks we maintain two priority queues each, one
162 * ordered for the earliest times the events may be
163 * dispatched, and one ordered by the latest times they must
164 * have been dispatched. The range between the top entries in
165 * the two prioqs is the time window we can freely schedule
166 * wakeups in */
167
168 Prioq *earliest;
169 Prioq *latest;
170 usec_t next;
171
172 bool needs_rearm:1;
173 };
174
175 struct signal_data {
176 WakeupType wakeup;
177
178 /* For each priority we maintain one signal fd, so that we
179 * only have to dequeue a single event per priority at a
180 * time. */
181
182 int fd;
183 int64_t priority;
184 sigset_t sigset;
185 sd_event_source *current;
186 };
187
188 struct sd_event {
189 unsigned n_ref;
190
191 int epoll_fd;
192 int watchdog_fd;
193
194 Prioq *pending;
195 Prioq *prepare;
196
197 /* timerfd_create() only supports these five clocks so far. We
198 * can add support for more clocks when the kernel learns to
199 * deal with them, too. */
200 struct clock_data realtime;
201 struct clock_data boottime;
202 struct clock_data monotonic;
203 struct clock_data realtime_alarm;
204 struct clock_data boottime_alarm;
205
206 usec_t perturb;
207
208 sd_event_source **signal_sources; /* indexed by signal number */
209 Hashmap *signal_data; /* indexed by priority */
210
211 Hashmap *child_sources;
212 unsigned n_enabled_child_sources;
213
214 Set *post_sources;
215
216 Prioq *exit;
217
218 pid_t original_pid;
219
220 uint64_t iteration;
221 triple_timestamp timestamp;
222 int state;
223
224 bool exit_requested:1;
225 bool need_process_child:1;
226 bool watchdog:1;
227 bool profile_delays:1;
228
229 int exit_code;
230
231 pid_t tid;
232 sd_event **default_event_ptr;
233
234 usec_t watchdog_last, watchdog_period;
235
236 unsigned n_sources;
237
238 LIST_HEAD(sd_event_source, sources);
239
240 usec_t last_run, last_log;
241 unsigned delays[sizeof(usec_t) * 8];
242 };
243
244 static thread_local sd_event *default_event = NULL;
245
246 static void source_disconnect(sd_event_source *s);
247
248 static sd_event *event_resolve(sd_event *e) {
249 return e == SD_EVENT_DEFAULT ? default_event : e;
250 }
251
252 static int pending_prioq_compare(const void *a, const void *b) {
253 const sd_event_source *x = a, *y = b;
254
255 assert(x->pending);
256 assert(y->pending);
257
258 /* Enabled ones first */
259 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
260 return -1;
261 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
262 return 1;
263
264 /* Lower priority values first */
265 if (x->priority < y->priority)
266 return -1;
267 if (x->priority > y->priority)
268 return 1;
269
270 /* Older entries first */
271 if (x->pending_iteration < y->pending_iteration)
272 return -1;
273 if (x->pending_iteration > y->pending_iteration)
274 return 1;
275
276 return 0;
277 }
278
279 static int prepare_prioq_compare(const void *a, const void *b) {
280 const sd_event_source *x = a, *y = b;
281
282 assert(x->prepare);
283 assert(y->prepare);
284
285 /* Enabled ones first */
286 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
287 return -1;
288 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
289 return 1;
290
291 /* Move most recently prepared ones last, so that we can stop
292 * preparing as soon as we hit one that has already been
293 * prepared in the current iteration */
294 if (x->prepare_iteration < y->prepare_iteration)
295 return -1;
296 if (x->prepare_iteration > y->prepare_iteration)
297 return 1;
298
299 /* Lower priority values first */
300 if (x->priority < y->priority)
301 return -1;
302 if (x->priority > y->priority)
303 return 1;
304
305 return 0;
306 }
307
308 static int earliest_time_prioq_compare(const void *a, const void *b) {
309 const sd_event_source *x = a, *y = b;
310
311 assert(EVENT_SOURCE_IS_TIME(x->type));
312 assert(x->type == y->type);
313
314 /* Enabled ones first */
315 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
316 return -1;
317 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
318 return 1;
319
320 /* Move the pending ones to the end */
321 if (!x->pending && y->pending)
322 return -1;
323 if (x->pending && !y->pending)
324 return 1;
325
326 /* Order by time */
327 if (x->time.next < y->time.next)
328 return -1;
329 if (x->time.next > y->time.next)
330 return 1;
331
332 return 0;
333 }
334
335 static usec_t time_event_source_latest(const sd_event_source *s) {
336 return usec_add(s->time.next, s->time.accuracy);
337 }
338
339 static int latest_time_prioq_compare(const void *a, const void *b) {
340 const sd_event_source *x = a, *y = b;
341
342 assert(EVENT_SOURCE_IS_TIME(x->type));
343 assert(x->type == y->type);
344
345 /* Enabled ones first */
346 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
347 return -1;
348 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
349 return 1;
350
351 /* Move the pending ones to the end */
352 if (!x->pending && y->pending)
353 return -1;
354 if (x->pending && !y->pending)
355 return 1;
356
357 /* Order by time */
358 if (time_event_source_latest(x) < time_event_source_latest(y))
359 return -1;
360 if (time_event_source_latest(x) > time_event_source_latest(y))
361 return 1;
362
363 return 0;
364 }
365
366 static int exit_prioq_compare(const void *a, const void *b) {
367 const sd_event_source *x = a, *y = b;
368
369 assert(x->type == SOURCE_EXIT);
370 assert(y->type == SOURCE_EXIT);
371
372 /* Enabled ones first */
373 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
374 return -1;
375 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
376 return 1;
377
378 /* Lower priority values first */
379 if (x->priority < y->priority)
380 return -1;
381 if (x->priority > y->priority)
382 return 1;
383
384 return 0;
385 }
386
387 static void free_clock_data(struct clock_data *d) {
388 assert(d);
389 assert(d->wakeup == WAKEUP_CLOCK_DATA);
390
391 safe_close(d->fd);
392 prioq_free(d->earliest);
393 prioq_free(d->latest);
394 }
395
396 static void event_free(sd_event *e) {
397 sd_event_source *s;
398
399 assert(e);
400
401 while ((s = e->sources)) {
402 assert(s->floating);
403 source_disconnect(s);
404 sd_event_source_unref(s);
405 }
406
407 assert(e->n_sources == 0);
408
409 if (e->default_event_ptr)
410 *(e->default_event_ptr) = NULL;
411
412 safe_close(e->epoll_fd);
413 safe_close(e->watchdog_fd);
414
415 free_clock_data(&e->realtime);
416 free_clock_data(&e->boottime);
417 free_clock_data(&e->monotonic);
418 free_clock_data(&e->realtime_alarm);
419 free_clock_data(&e->boottime_alarm);
420
421 prioq_free(e->pending);
422 prioq_free(e->prepare);
423 prioq_free(e->exit);
424
425 free(e->signal_sources);
426 hashmap_free(e->signal_data);
427
428 hashmap_free(e->child_sources);
429 set_free(e->post_sources);
430 free(e);
431 }
432
433 _public_ int sd_event_new(sd_event** ret) {
434 sd_event *e;
435 int r;
436
437 assert_return(ret, -EINVAL);
438
439 e = new0(sd_event, 1);
440 if (!e)
441 return -ENOMEM;
442
443 e->n_ref = 1;
444 e->watchdog_fd = e->epoll_fd = e->realtime.fd = e->boottime.fd = e->monotonic.fd = e->realtime_alarm.fd = e->boottime_alarm.fd = -1;
445 e->realtime.next = e->boottime.next = e->monotonic.next = e->realtime_alarm.next = e->boottime_alarm.next = USEC_INFINITY;
446 e->realtime.wakeup = e->boottime.wakeup = e->monotonic.wakeup = e->realtime_alarm.wakeup = e->boottime_alarm.wakeup = WAKEUP_CLOCK_DATA;
447 e->original_pid = getpid_cached();
448 e->perturb = USEC_INFINITY;
449
450 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
451 if (r < 0)
452 goto fail;
453
454 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
455 if (e->epoll_fd < 0) {
456 r = -errno;
457 goto fail;
458 }
459
460 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
461 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
462 e->profile_delays = true;
463 }
464
465 *ret = e;
466 return 0;
467
468 fail:
469 event_free(e);
470 return r;
471 }
472
473 _public_ sd_event* sd_event_ref(sd_event *e) {
474
475 if (!e)
476 return NULL;
477
478 assert(e->n_ref >= 1);
479 e->n_ref++;
480
481 return e;
482 }
483
484 _public_ sd_event* sd_event_unref(sd_event *e) {
485
486 if (!e)
487 return NULL;
488
489 assert(e->n_ref >= 1);
490 e->n_ref--;
491
492 if (e->n_ref <= 0)
493 event_free(e);
494
495 return NULL;
496 }
497
498 static bool event_pid_changed(sd_event *e) {
499 assert(e);
500
501 /* We don't support people creating an event loop and keeping
502 * it around over a fork(). Let's complain. */
503
504 return e->original_pid != getpid_cached();
505 }
506
507 static void source_io_unregister(sd_event_source *s) {
508 int r;
509
510 assert(s);
511 assert(s->type == SOURCE_IO);
512
513 if (event_pid_changed(s->event))
514 return;
515
516 if (!s->io.registered)
517 return;
518
519 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
520 if (r < 0)
521 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
522 strna(s->description), event_source_type_to_string(s->type));
523
524 s->io.registered = false;
525 }
526
527 static int source_io_register(
528 sd_event_source *s,
529 int enabled,
530 uint32_t events) {
531
532 struct epoll_event ev = {};
533 int r;
534
535 assert(s);
536 assert(s->type == SOURCE_IO);
537 assert(enabled != SD_EVENT_OFF);
538
539 ev.events = events;
540 ev.data.ptr = s;
541
542 if (enabled == SD_EVENT_ONESHOT)
543 ev.events |= EPOLLONESHOT;
544
545 if (s->io.registered)
546 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
547 else
548 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
549 if (r < 0)
550 return -errno;
551
552 s->io.registered = true;
553
554 return 0;
555 }
556
557 static clockid_t event_source_type_to_clock(EventSourceType t) {
558
559 switch (t) {
560
561 case SOURCE_TIME_REALTIME:
562 return CLOCK_REALTIME;
563
564 case SOURCE_TIME_BOOTTIME:
565 return CLOCK_BOOTTIME;
566
567 case SOURCE_TIME_MONOTONIC:
568 return CLOCK_MONOTONIC;
569
570 case SOURCE_TIME_REALTIME_ALARM:
571 return CLOCK_REALTIME_ALARM;
572
573 case SOURCE_TIME_BOOTTIME_ALARM:
574 return CLOCK_BOOTTIME_ALARM;
575
576 default:
577 return (clockid_t) -1;
578 }
579 }
580
581 static EventSourceType clock_to_event_source_type(clockid_t clock) {
582
583 switch (clock) {
584
585 case CLOCK_REALTIME:
586 return SOURCE_TIME_REALTIME;
587
588 case CLOCK_BOOTTIME:
589 return SOURCE_TIME_BOOTTIME;
590
591 case CLOCK_MONOTONIC:
592 return SOURCE_TIME_MONOTONIC;
593
594 case CLOCK_REALTIME_ALARM:
595 return SOURCE_TIME_REALTIME_ALARM;
596
597 case CLOCK_BOOTTIME_ALARM:
598 return SOURCE_TIME_BOOTTIME_ALARM;
599
600 default:
601 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
602 }
603 }
604
605 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
606 assert(e);
607
608 switch (t) {
609
610 case SOURCE_TIME_REALTIME:
611 return &e->realtime;
612
613 case SOURCE_TIME_BOOTTIME:
614 return &e->boottime;
615
616 case SOURCE_TIME_MONOTONIC:
617 return &e->monotonic;
618
619 case SOURCE_TIME_REALTIME_ALARM:
620 return &e->realtime_alarm;
621
622 case SOURCE_TIME_BOOTTIME_ALARM:
623 return &e->boottime_alarm;
624
625 default:
626 return NULL;
627 }
628 }
629
630 static int event_make_signal_data(
631 sd_event *e,
632 int sig,
633 struct signal_data **ret) {
634
635 struct epoll_event ev = {};
636 struct signal_data *d;
637 bool added = false;
638 sigset_t ss_copy;
639 int64_t priority;
640 int r;
641
642 assert(e);
643
644 if (event_pid_changed(e))
645 return -ECHILD;
646
647 if (e->signal_sources && e->signal_sources[sig])
648 priority = e->signal_sources[sig]->priority;
649 else
650 priority = 0;
651
652 d = hashmap_get(e->signal_data, &priority);
653 if (d) {
654 if (sigismember(&d->sigset, sig) > 0) {
655 if (ret)
656 *ret = d;
657 return 0;
658 }
659 } else {
660 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
661 if (r < 0)
662 return r;
663
664 d = new0(struct signal_data, 1);
665 if (!d)
666 return -ENOMEM;
667
668 d->wakeup = WAKEUP_SIGNAL_DATA;
669 d->fd = -1;
670 d->priority = priority;
671
672 r = hashmap_put(e->signal_data, &d->priority, d);
673 if (r < 0) {
674 free(d);
675 return r;
676 }
677
678 added = true;
679 }
680
681 ss_copy = d->sigset;
682 assert_se(sigaddset(&ss_copy, sig) >= 0);
683
684 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
685 if (r < 0) {
686 r = -errno;
687 goto fail;
688 }
689
690 d->sigset = ss_copy;
691
692 if (d->fd >= 0) {
693 if (ret)
694 *ret = d;
695 return 0;
696 }
697
698 d->fd = r;
699
700 ev.events = EPOLLIN;
701 ev.data.ptr = d;
702
703 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
704 if (r < 0) {
705 r = -errno;
706 goto fail;
707 }
708
709 if (ret)
710 *ret = d;
711
712 return 0;
713
714 fail:
715 if (added) {
716 d->fd = safe_close(d->fd);
717 hashmap_remove(e->signal_data, &d->priority);
718 free(d);
719 }
720
721 return r;
722 }
723
724 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
725 assert(e);
726 assert(d);
727
728 /* Turns off the specified signal in the signal data
729 * object. If the signal mask of the object becomes empty that
730 * way removes it. */
731
732 if (sigismember(&d->sigset, sig) == 0)
733 return;
734
735 assert_se(sigdelset(&d->sigset, sig) >= 0);
736
737 if (sigisemptyset(&d->sigset)) {
738
739 /* If all the mask is all-zero we can get rid of the structure */
740 hashmap_remove(e->signal_data, &d->priority);
741 safe_close(d->fd);
742 free(d);
743 return;
744 }
745
746 assert(d->fd >= 0);
747
748 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
749 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
750 }
751
752 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
753 struct signal_data *d;
754 static const int64_t zero_priority = 0;
755
756 assert(e);
757
758 /* Rechecks if the specified signal is still something we are
759 * interested in. If not, we'll unmask it, and possibly drop
760 * the signalfd for it. */
761
762 if (sig == SIGCHLD &&
763 e->n_enabled_child_sources > 0)
764 return;
765
766 if (e->signal_sources &&
767 e->signal_sources[sig] &&
768 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
769 return;
770
771 /*
772 * The specified signal might be enabled in three different queues:
773 *
774 * 1) the one that belongs to the priority passed (if it is non-NULL)
775 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
776 * 3) the 0 priority (to cover the SIGCHLD case)
777 *
778 * Hence, let's remove it from all three here.
779 */
780
781 if (priority) {
782 d = hashmap_get(e->signal_data, priority);
783 if (d)
784 event_unmask_signal_data(e, d, sig);
785 }
786
787 if (e->signal_sources && e->signal_sources[sig]) {
788 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
789 if (d)
790 event_unmask_signal_data(e, d, sig);
791 }
792
793 d = hashmap_get(e->signal_data, &zero_priority);
794 if (d)
795 event_unmask_signal_data(e, d, sig);
796 }
797
798 static void source_disconnect(sd_event_source *s) {
799 sd_event *event;
800
801 assert(s);
802
803 if (!s->event)
804 return;
805
806 assert(s->event->n_sources > 0);
807
808 switch (s->type) {
809
810 case SOURCE_IO:
811 if (s->io.fd >= 0)
812 source_io_unregister(s);
813
814 break;
815
816 case SOURCE_TIME_REALTIME:
817 case SOURCE_TIME_BOOTTIME:
818 case SOURCE_TIME_MONOTONIC:
819 case SOURCE_TIME_REALTIME_ALARM:
820 case SOURCE_TIME_BOOTTIME_ALARM: {
821 struct clock_data *d;
822
823 d = event_get_clock_data(s->event, s->type);
824 assert(d);
825
826 prioq_remove(d->earliest, s, &s->time.earliest_index);
827 prioq_remove(d->latest, s, &s->time.latest_index);
828 d->needs_rearm = true;
829 break;
830 }
831
832 case SOURCE_SIGNAL:
833 if (s->signal.sig > 0) {
834
835 if (s->event->signal_sources)
836 s->event->signal_sources[s->signal.sig] = NULL;
837
838 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
839 }
840
841 break;
842
843 case SOURCE_CHILD:
844 if (s->child.pid > 0) {
845 if (s->enabled != SD_EVENT_OFF) {
846 assert(s->event->n_enabled_child_sources > 0);
847 s->event->n_enabled_child_sources--;
848 }
849
850 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
851 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
852 }
853
854 break;
855
856 case SOURCE_DEFER:
857 /* nothing */
858 break;
859
860 case SOURCE_POST:
861 set_remove(s->event->post_sources, s);
862 break;
863
864 case SOURCE_EXIT:
865 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
866 break;
867
868 default:
869 assert_not_reached("Wut? I shouldn't exist.");
870 }
871
872 if (s->pending)
873 prioq_remove(s->event->pending, s, &s->pending_index);
874
875 if (s->prepare)
876 prioq_remove(s->event->prepare, s, &s->prepare_index);
877
878 event = s->event;
879
880 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
881 s->event = NULL;
882 LIST_REMOVE(sources, event->sources, s);
883 event->n_sources--;
884
885 if (!s->floating)
886 sd_event_unref(event);
887 }
888
889 static void source_free(sd_event_source *s) {
890 assert(s);
891
892 source_disconnect(s);
893
894 if (s->type == SOURCE_IO && s->io.owned)
895 safe_close(s->io.fd);
896
897 free(s->description);
898 free(s);
899 }
900
901 static int source_set_pending(sd_event_source *s, bool b) {
902 int r;
903
904 assert(s);
905 assert(s->type != SOURCE_EXIT);
906
907 if (s->pending == b)
908 return 0;
909
910 s->pending = b;
911
912 if (b) {
913 s->pending_iteration = s->event->iteration;
914
915 r = prioq_put(s->event->pending, s, &s->pending_index);
916 if (r < 0) {
917 s->pending = false;
918 return r;
919 }
920 } else
921 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
922
923 if (EVENT_SOURCE_IS_TIME(s->type)) {
924 struct clock_data *d;
925
926 d = event_get_clock_data(s->event, s->type);
927 assert(d);
928
929 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
930 prioq_reshuffle(d->latest, s, &s->time.latest_index);
931 d->needs_rearm = true;
932 }
933
934 if (s->type == SOURCE_SIGNAL && !b) {
935 struct signal_data *d;
936
937 d = hashmap_get(s->event->signal_data, &s->priority);
938 if (d && d->current == s)
939 d->current = NULL;
940 }
941
942 return 0;
943 }
944
945 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
946 sd_event_source *s;
947
948 assert(e);
949
950 s = new0(sd_event_source, 1);
951 if (!s)
952 return NULL;
953
954 s->n_ref = 1;
955 s->event = e;
956 s->floating = floating;
957 s->type = type;
958 s->pending_index = s->prepare_index = PRIOQ_IDX_NULL;
959
960 if (!floating)
961 sd_event_ref(e);
962
963 LIST_PREPEND(sources, e->sources, s);
964 e->n_sources++;
965
966 return s;
967 }
968
969 _public_ int sd_event_add_io(
970 sd_event *e,
971 sd_event_source **ret,
972 int fd,
973 uint32_t events,
974 sd_event_io_handler_t callback,
975 void *userdata) {
976
977 sd_event_source *s;
978 int r;
979
980 assert_return(e, -EINVAL);
981 assert_return(e = event_resolve(e), -ENOPKG);
982 assert_return(fd >= 0, -EBADF);
983 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
984 assert_return(callback, -EINVAL);
985 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
986 assert_return(!event_pid_changed(e), -ECHILD);
987
988 s = source_new(e, !ret, SOURCE_IO);
989 if (!s)
990 return -ENOMEM;
991
992 s->wakeup = WAKEUP_EVENT_SOURCE;
993 s->io.fd = fd;
994 s->io.events = events;
995 s->io.callback = callback;
996 s->userdata = userdata;
997 s->enabled = SD_EVENT_ON;
998
999 r = source_io_register(s, s->enabled, events);
1000 if (r < 0) {
1001 source_free(s);
1002 return r;
1003 }
1004
1005 if (ret)
1006 *ret = s;
1007
1008 return 0;
1009 }
1010
1011 static void initialize_perturb(sd_event *e) {
1012 sd_id128_t bootid = {};
1013
1014 /* When we sleep for longer, we try to realign the wakeup to
1015 the same time wihtin each minute/second/250ms, so that
1016 events all across the system can be coalesced into a single
1017 CPU wakeup. However, let's take some system-specific
1018 randomness for this value, so that in a network of systems
1019 with synced clocks timer events are distributed a
1020 bit. Here, we calculate a perturbation usec offset from the
1021 boot ID. */
1022
1023 if (_likely_(e->perturb != USEC_INFINITY))
1024 return;
1025
1026 if (sd_id128_get_boot(&bootid) >= 0)
1027 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1028 }
1029
1030 static int event_setup_timer_fd(
1031 sd_event *e,
1032 struct clock_data *d,
1033 clockid_t clock) {
1034
1035 struct epoll_event ev = {};
1036 int r, fd;
1037
1038 assert(e);
1039 assert(d);
1040
1041 if (_likely_(d->fd >= 0))
1042 return 0;
1043
1044 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1045 if (fd < 0)
1046 return -errno;
1047
1048 ev.events = EPOLLIN;
1049 ev.data.ptr = d;
1050
1051 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1052 if (r < 0) {
1053 safe_close(fd);
1054 return -errno;
1055 }
1056
1057 d->fd = fd;
1058 return 0;
1059 }
1060
1061 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1062 assert(s);
1063
1064 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1065 }
1066
1067 _public_ int sd_event_add_time(
1068 sd_event *e,
1069 sd_event_source **ret,
1070 clockid_t clock,
1071 uint64_t usec,
1072 uint64_t accuracy,
1073 sd_event_time_handler_t callback,
1074 void *userdata) {
1075
1076 EventSourceType type;
1077 sd_event_source *s;
1078 struct clock_data *d;
1079 int r;
1080
1081 assert_return(e, -EINVAL);
1082 assert_return(e = event_resolve(e), -ENOPKG);
1083 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1084 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1085 assert_return(!event_pid_changed(e), -ECHILD);
1086
1087 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1088 return -EOPNOTSUPP;
1089
1090 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1091 if (type < 0)
1092 return -EOPNOTSUPP;
1093
1094 if (!callback)
1095 callback = time_exit_callback;
1096
1097 d = event_get_clock_data(e, type);
1098 assert(d);
1099
1100 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1101 if (r < 0)
1102 return r;
1103
1104 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1105 if (r < 0)
1106 return r;
1107
1108 if (d->fd < 0) {
1109 r = event_setup_timer_fd(e, d, clock);
1110 if (r < 0)
1111 return r;
1112 }
1113
1114 s = source_new(e, !ret, type);
1115 if (!s)
1116 return -ENOMEM;
1117
1118 s->time.next = usec;
1119 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1120 s->time.callback = callback;
1121 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1122 s->userdata = userdata;
1123 s->enabled = SD_EVENT_ONESHOT;
1124
1125 d->needs_rearm = true;
1126
1127 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1128 if (r < 0)
1129 goto fail;
1130
1131 r = prioq_put(d->latest, s, &s->time.latest_index);
1132 if (r < 0)
1133 goto fail;
1134
1135 if (ret)
1136 *ret = s;
1137
1138 return 0;
1139
1140 fail:
1141 source_free(s);
1142 return r;
1143 }
1144
1145 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1146 assert(s);
1147
1148 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1149 }
1150
1151 _public_ int sd_event_add_signal(
1152 sd_event *e,
1153 sd_event_source **ret,
1154 int sig,
1155 sd_event_signal_handler_t callback,
1156 void *userdata) {
1157
1158 sd_event_source *s;
1159 struct signal_data *d;
1160 sigset_t ss;
1161 int r;
1162
1163 assert_return(e, -EINVAL);
1164 assert_return(e = event_resolve(e), -ENOPKG);
1165 assert_return(SIGNAL_VALID(sig), -EINVAL);
1166 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1167 assert_return(!event_pid_changed(e), -ECHILD);
1168
1169 if (!callback)
1170 callback = signal_exit_callback;
1171
1172 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1173 if (r != 0)
1174 return -r;
1175
1176 if (!sigismember(&ss, sig))
1177 return -EBUSY;
1178
1179 if (!e->signal_sources) {
1180 e->signal_sources = new0(sd_event_source*, _NSIG);
1181 if (!e->signal_sources)
1182 return -ENOMEM;
1183 } else if (e->signal_sources[sig])
1184 return -EBUSY;
1185
1186 s = source_new(e, !ret, SOURCE_SIGNAL);
1187 if (!s)
1188 return -ENOMEM;
1189
1190 s->signal.sig = sig;
1191 s->signal.callback = callback;
1192 s->userdata = userdata;
1193 s->enabled = SD_EVENT_ON;
1194
1195 e->signal_sources[sig] = s;
1196
1197 r = event_make_signal_data(e, sig, &d);
1198 if (r < 0) {
1199 source_free(s);
1200 return r;
1201 }
1202
1203 /* Use the signal name as description for the event source by default */
1204 (void) sd_event_source_set_description(s, signal_to_string(sig));
1205
1206 if (ret)
1207 *ret = s;
1208
1209 return 0;
1210 }
1211
1212 _public_ int sd_event_add_child(
1213 sd_event *e,
1214 sd_event_source **ret,
1215 pid_t pid,
1216 int options,
1217 sd_event_child_handler_t callback,
1218 void *userdata) {
1219
1220 sd_event_source *s;
1221 int r;
1222
1223 assert_return(e, -EINVAL);
1224 assert_return(e = event_resolve(e), -ENOPKG);
1225 assert_return(pid > 1, -EINVAL);
1226 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1227 assert_return(options != 0, -EINVAL);
1228 assert_return(callback, -EINVAL);
1229 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1230 assert_return(!event_pid_changed(e), -ECHILD);
1231
1232 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1233 if (r < 0)
1234 return r;
1235
1236 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1237 return -EBUSY;
1238
1239 s = source_new(e, !ret, SOURCE_CHILD);
1240 if (!s)
1241 return -ENOMEM;
1242
1243 s->child.pid = pid;
1244 s->child.options = options;
1245 s->child.callback = callback;
1246 s->userdata = userdata;
1247 s->enabled = SD_EVENT_ONESHOT;
1248
1249 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1250 if (r < 0) {
1251 source_free(s);
1252 return r;
1253 }
1254
1255 e->n_enabled_child_sources++;
1256
1257 r = event_make_signal_data(e, SIGCHLD, NULL);
1258 if (r < 0) {
1259 e->n_enabled_child_sources--;
1260 source_free(s);
1261 return r;
1262 }
1263
1264 e->need_process_child = true;
1265
1266 if (ret)
1267 *ret = s;
1268
1269 return 0;
1270 }
1271
1272 _public_ int sd_event_add_defer(
1273 sd_event *e,
1274 sd_event_source **ret,
1275 sd_event_handler_t callback,
1276 void *userdata) {
1277
1278 sd_event_source *s;
1279 int r;
1280
1281 assert_return(e, -EINVAL);
1282 assert_return(e = event_resolve(e), -ENOPKG);
1283 assert_return(callback, -EINVAL);
1284 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1285 assert_return(!event_pid_changed(e), -ECHILD);
1286
1287 s = source_new(e, !ret, SOURCE_DEFER);
1288 if (!s)
1289 return -ENOMEM;
1290
1291 s->defer.callback = callback;
1292 s->userdata = userdata;
1293 s->enabled = SD_EVENT_ONESHOT;
1294
1295 r = source_set_pending(s, true);
1296 if (r < 0) {
1297 source_free(s);
1298 return r;
1299 }
1300
1301 if (ret)
1302 *ret = s;
1303
1304 return 0;
1305 }
1306
1307 _public_ int sd_event_add_post(
1308 sd_event *e,
1309 sd_event_source **ret,
1310 sd_event_handler_t callback,
1311 void *userdata) {
1312
1313 sd_event_source *s;
1314 int r;
1315
1316 assert_return(e, -EINVAL);
1317 assert_return(e = event_resolve(e), -ENOPKG);
1318 assert_return(callback, -EINVAL);
1319 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1320 assert_return(!event_pid_changed(e), -ECHILD);
1321
1322 r = set_ensure_allocated(&e->post_sources, NULL);
1323 if (r < 0)
1324 return r;
1325
1326 s = source_new(e, !ret, SOURCE_POST);
1327 if (!s)
1328 return -ENOMEM;
1329
1330 s->post.callback = callback;
1331 s->userdata = userdata;
1332 s->enabled = SD_EVENT_ON;
1333
1334 r = set_put(e->post_sources, s);
1335 if (r < 0) {
1336 source_free(s);
1337 return r;
1338 }
1339
1340 if (ret)
1341 *ret = s;
1342
1343 return 0;
1344 }
1345
1346 _public_ int sd_event_add_exit(
1347 sd_event *e,
1348 sd_event_source **ret,
1349 sd_event_handler_t callback,
1350 void *userdata) {
1351
1352 sd_event_source *s;
1353 int r;
1354
1355 assert_return(e, -EINVAL);
1356 assert_return(e = event_resolve(e), -ENOPKG);
1357 assert_return(callback, -EINVAL);
1358 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1359 assert_return(!event_pid_changed(e), -ECHILD);
1360
1361 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1362 if (r < 0)
1363 return r;
1364
1365 s = source_new(e, !ret, SOURCE_EXIT);
1366 if (!s)
1367 return -ENOMEM;
1368
1369 s->exit.callback = callback;
1370 s->userdata = userdata;
1371 s->exit.prioq_index = PRIOQ_IDX_NULL;
1372 s->enabled = SD_EVENT_ONESHOT;
1373
1374 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1375 if (r < 0) {
1376 source_free(s);
1377 return r;
1378 }
1379
1380 if (ret)
1381 *ret = s;
1382
1383 return 0;
1384 }
1385
1386 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1387
1388 if (!s)
1389 return NULL;
1390
1391 assert(s->n_ref >= 1);
1392 s->n_ref++;
1393
1394 return s;
1395 }
1396
1397 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1398
1399 if (!s)
1400 return NULL;
1401
1402 assert(s->n_ref >= 1);
1403 s->n_ref--;
1404
1405 if (s->n_ref <= 0) {
1406 /* Here's a special hack: when we are called from a
1407 * dispatch handler we won't free the event source
1408 * immediately, but we will detach the fd from the
1409 * epoll. This way it is safe for the caller to unref
1410 * the event source and immediately close the fd, but
1411 * we still retain a valid event source object after
1412 * the callback. */
1413
1414 if (s->dispatching) {
1415 if (s->type == SOURCE_IO)
1416 source_io_unregister(s);
1417
1418 source_disconnect(s);
1419 } else
1420 source_free(s);
1421 }
1422
1423 return NULL;
1424 }
1425
1426 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1427 assert_return(s, -EINVAL);
1428 assert_return(!event_pid_changed(s->event), -ECHILD);
1429
1430 return free_and_strdup(&s->description, description);
1431 }
1432
1433 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1434 assert_return(s, -EINVAL);
1435 assert_return(description, -EINVAL);
1436 assert_return(s->description, -ENXIO);
1437 assert_return(!event_pid_changed(s->event), -ECHILD);
1438
1439 *description = s->description;
1440 return 0;
1441 }
1442
1443 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1444 assert_return(s, NULL);
1445
1446 return s->event;
1447 }
1448
1449 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1450 assert_return(s, -EINVAL);
1451 assert_return(s->type != SOURCE_EXIT, -EDOM);
1452 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1453 assert_return(!event_pid_changed(s->event), -ECHILD);
1454
1455 return s->pending;
1456 }
1457
1458 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1459 assert_return(s, -EINVAL);
1460 assert_return(s->type == SOURCE_IO, -EDOM);
1461 assert_return(!event_pid_changed(s->event), -ECHILD);
1462
1463 return s->io.fd;
1464 }
1465
1466 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1467 int r;
1468
1469 assert_return(s, -EINVAL);
1470 assert_return(fd >= 0, -EBADF);
1471 assert_return(s->type == SOURCE_IO, -EDOM);
1472 assert_return(!event_pid_changed(s->event), -ECHILD);
1473
1474 if (s->io.fd == fd)
1475 return 0;
1476
1477 if (s->enabled == SD_EVENT_OFF) {
1478 s->io.fd = fd;
1479 s->io.registered = false;
1480 } else {
1481 int saved_fd;
1482
1483 saved_fd = s->io.fd;
1484 assert(s->io.registered);
1485
1486 s->io.fd = fd;
1487 s->io.registered = false;
1488
1489 r = source_io_register(s, s->enabled, s->io.events);
1490 if (r < 0) {
1491 s->io.fd = saved_fd;
1492 s->io.registered = true;
1493 return r;
1494 }
1495
1496 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1497 }
1498
1499 return 0;
1500 }
1501
1502 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1503 assert_return(s, -EINVAL);
1504 assert_return(s->type == SOURCE_IO, -EDOM);
1505
1506 return s->io.owned;
1507 }
1508
1509 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1510 assert_return(s, -EINVAL);
1511 assert_return(s->type == SOURCE_IO, -EDOM);
1512
1513 s->io.owned = own;
1514 return 0;
1515 }
1516
1517 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1518 assert_return(s, -EINVAL);
1519 assert_return(events, -EINVAL);
1520 assert_return(s->type == SOURCE_IO, -EDOM);
1521 assert_return(!event_pid_changed(s->event), -ECHILD);
1522
1523 *events = s->io.events;
1524 return 0;
1525 }
1526
1527 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1528 int r;
1529
1530 assert_return(s, -EINVAL);
1531 assert_return(s->type == SOURCE_IO, -EDOM);
1532 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1533 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1534 assert_return(!event_pid_changed(s->event), -ECHILD);
1535
1536 /* edge-triggered updates are never skipped, so we can reset edges */
1537 if (s->io.events == events && !(events & EPOLLET))
1538 return 0;
1539
1540 if (s->enabled != SD_EVENT_OFF) {
1541 r = source_io_register(s, s->enabled, events);
1542 if (r < 0)
1543 return r;
1544 }
1545
1546 s->io.events = events;
1547 source_set_pending(s, false);
1548
1549 return 0;
1550 }
1551
1552 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1553 assert_return(s, -EINVAL);
1554 assert_return(revents, -EINVAL);
1555 assert_return(s->type == SOURCE_IO, -EDOM);
1556 assert_return(s->pending, -ENODATA);
1557 assert_return(!event_pid_changed(s->event), -ECHILD);
1558
1559 *revents = s->io.revents;
1560 return 0;
1561 }
1562
1563 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1564 assert_return(s, -EINVAL);
1565 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1566 assert_return(!event_pid_changed(s->event), -ECHILD);
1567
1568 return s->signal.sig;
1569 }
1570
1571 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1572 assert_return(s, -EINVAL);
1573 assert_return(!event_pid_changed(s->event), -ECHILD);
1574
1575 *priority = s->priority;
1576 return 0;
1577 }
1578
1579 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1580 int r;
1581
1582 assert_return(s, -EINVAL);
1583 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1584 assert_return(!event_pid_changed(s->event), -ECHILD);
1585
1586 if (s->priority == priority)
1587 return 0;
1588
1589 if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1590 struct signal_data *old, *d;
1591
1592 /* Move us from the signalfd belonging to the old
1593 * priority to the signalfd of the new priority */
1594
1595 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1596
1597 s->priority = priority;
1598
1599 r = event_make_signal_data(s->event, s->signal.sig, &d);
1600 if (r < 0) {
1601 s->priority = old->priority;
1602 return r;
1603 }
1604
1605 event_unmask_signal_data(s->event, old, s->signal.sig);
1606 } else
1607 s->priority = priority;
1608
1609 if (s->pending)
1610 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1611
1612 if (s->prepare)
1613 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1614
1615 if (s->type == SOURCE_EXIT)
1616 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1617
1618 return 0;
1619 }
1620
1621 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1622 assert_return(s, -EINVAL);
1623 assert_return(m, -EINVAL);
1624 assert_return(!event_pid_changed(s->event), -ECHILD);
1625
1626 *m = s->enabled;
1627 return 0;
1628 }
1629
1630 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1631 int r;
1632
1633 assert_return(s, -EINVAL);
1634 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1635 assert_return(!event_pid_changed(s->event), -ECHILD);
1636
1637 /* If we are dead anyway, we are fine with turning off
1638 * sources, but everything else needs to fail. */
1639 if (s->event->state == SD_EVENT_FINISHED)
1640 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1641
1642 if (s->enabled == m)
1643 return 0;
1644
1645 if (m == SD_EVENT_OFF) {
1646
1647 switch (s->type) {
1648
1649 case SOURCE_IO:
1650 source_io_unregister(s);
1651 s->enabled = m;
1652 break;
1653
1654 case SOURCE_TIME_REALTIME:
1655 case SOURCE_TIME_BOOTTIME:
1656 case SOURCE_TIME_MONOTONIC:
1657 case SOURCE_TIME_REALTIME_ALARM:
1658 case SOURCE_TIME_BOOTTIME_ALARM: {
1659 struct clock_data *d;
1660
1661 s->enabled = m;
1662 d = event_get_clock_data(s->event, s->type);
1663 assert(d);
1664
1665 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1666 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1667 d->needs_rearm = true;
1668 break;
1669 }
1670
1671 case SOURCE_SIGNAL:
1672 s->enabled = m;
1673
1674 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1675 break;
1676
1677 case SOURCE_CHILD:
1678 s->enabled = m;
1679
1680 assert(s->event->n_enabled_child_sources > 0);
1681 s->event->n_enabled_child_sources--;
1682
1683 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1684 break;
1685
1686 case SOURCE_EXIT:
1687 s->enabled = m;
1688 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1689 break;
1690
1691 case SOURCE_DEFER:
1692 case SOURCE_POST:
1693 s->enabled = m;
1694 break;
1695
1696 default:
1697 assert_not_reached("Wut? I shouldn't exist.");
1698 }
1699
1700 } else {
1701 switch (s->type) {
1702
1703 case SOURCE_IO:
1704 r = source_io_register(s, m, s->io.events);
1705 if (r < 0)
1706 return r;
1707
1708 s->enabled = m;
1709 break;
1710
1711 case SOURCE_TIME_REALTIME:
1712 case SOURCE_TIME_BOOTTIME:
1713 case SOURCE_TIME_MONOTONIC:
1714 case SOURCE_TIME_REALTIME_ALARM:
1715 case SOURCE_TIME_BOOTTIME_ALARM: {
1716 struct clock_data *d;
1717
1718 s->enabled = m;
1719 d = event_get_clock_data(s->event, s->type);
1720 assert(d);
1721
1722 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1723 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1724 d->needs_rearm = true;
1725 break;
1726 }
1727
1728 case SOURCE_SIGNAL:
1729
1730 s->enabled = m;
1731
1732 r = event_make_signal_data(s->event, s->signal.sig, NULL);
1733 if (r < 0) {
1734 s->enabled = SD_EVENT_OFF;
1735 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
1736 return r;
1737 }
1738
1739 break;
1740
1741 case SOURCE_CHILD:
1742
1743 if (s->enabled == SD_EVENT_OFF)
1744 s->event->n_enabled_child_sources++;
1745
1746 s->enabled = m;
1747
1748 r = event_make_signal_data(s->event, SIGCHLD, NULL);
1749 if (r < 0) {
1750 s->enabled = SD_EVENT_OFF;
1751 s->event->n_enabled_child_sources--;
1752 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
1753 return r;
1754 }
1755
1756 break;
1757
1758 case SOURCE_EXIT:
1759 s->enabled = m;
1760 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1761 break;
1762
1763 case SOURCE_DEFER:
1764 case SOURCE_POST:
1765 s->enabled = m;
1766 break;
1767
1768 default:
1769 assert_not_reached("Wut? I shouldn't exist.");
1770 }
1771 }
1772
1773 if (s->pending)
1774 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1775
1776 if (s->prepare)
1777 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1778
1779 return 0;
1780 }
1781
1782 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
1783 assert_return(s, -EINVAL);
1784 assert_return(usec, -EINVAL);
1785 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1786 assert_return(!event_pid_changed(s->event), -ECHILD);
1787
1788 *usec = s->time.next;
1789 return 0;
1790 }
1791
1792 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
1793 struct clock_data *d;
1794
1795 assert_return(s, -EINVAL);
1796 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1797 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1798 assert_return(!event_pid_changed(s->event), -ECHILD);
1799
1800 s->time.next = usec;
1801
1802 source_set_pending(s, false);
1803
1804 d = event_get_clock_data(s->event, s->type);
1805 assert(d);
1806
1807 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1808 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1809 d->needs_rearm = true;
1810
1811 return 0;
1812 }
1813
1814 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
1815 assert_return(s, -EINVAL);
1816 assert_return(usec, -EINVAL);
1817 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1818 assert_return(!event_pid_changed(s->event), -ECHILD);
1819
1820 *usec = s->time.accuracy;
1821 return 0;
1822 }
1823
1824 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
1825 struct clock_data *d;
1826
1827 assert_return(s, -EINVAL);
1828 assert_return(usec != (uint64_t) -1, -EINVAL);
1829 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1830 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1831 assert_return(!event_pid_changed(s->event), -ECHILD);
1832
1833 if (usec == 0)
1834 usec = DEFAULT_ACCURACY_USEC;
1835
1836 s->time.accuracy = usec;
1837
1838 source_set_pending(s, false);
1839
1840 d = event_get_clock_data(s->event, s->type);
1841 assert(d);
1842
1843 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1844 d->needs_rearm = true;
1845
1846 return 0;
1847 }
1848
1849 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
1850 assert_return(s, -EINVAL);
1851 assert_return(clock, -EINVAL);
1852 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
1853 assert_return(!event_pid_changed(s->event), -ECHILD);
1854
1855 *clock = event_source_type_to_clock(s->type);
1856 return 0;
1857 }
1858
1859 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
1860 assert_return(s, -EINVAL);
1861 assert_return(pid, -EINVAL);
1862 assert_return(s->type == SOURCE_CHILD, -EDOM);
1863 assert_return(!event_pid_changed(s->event), -ECHILD);
1864
1865 *pid = s->child.pid;
1866 return 0;
1867 }
1868
1869 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
1870 int r;
1871
1872 assert_return(s, -EINVAL);
1873 assert_return(s->type != SOURCE_EXIT, -EDOM);
1874 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1875 assert_return(!event_pid_changed(s->event), -ECHILD);
1876
1877 if (s->prepare == callback)
1878 return 0;
1879
1880 if (callback && s->prepare) {
1881 s->prepare = callback;
1882 return 0;
1883 }
1884
1885 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
1886 if (r < 0)
1887 return r;
1888
1889 s->prepare = callback;
1890
1891 if (callback) {
1892 r = prioq_put(s->event->prepare, s, &s->prepare_index);
1893 if (r < 0)
1894 return r;
1895 } else
1896 prioq_remove(s->event->prepare, s, &s->prepare_index);
1897
1898 return 0;
1899 }
1900
1901 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
1902 assert_return(s, NULL);
1903
1904 return s->userdata;
1905 }
1906
1907 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
1908 void *ret;
1909
1910 assert_return(s, NULL);
1911
1912 ret = s->userdata;
1913 s->userdata = userdata;
1914
1915 return ret;
1916 }
1917
1918 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
1919 usec_t c;
1920 assert(e);
1921 assert(a <= b);
1922
1923 if (a <= 0)
1924 return 0;
1925 if (a >= USEC_INFINITY)
1926 return USEC_INFINITY;
1927
1928 if (b <= a + 1)
1929 return a;
1930
1931 initialize_perturb(e);
1932
1933 /*
1934 Find a good time to wake up again between times a and b. We
1935 have two goals here:
1936
1937 a) We want to wake up as seldom as possible, hence prefer
1938 later times over earlier times.
1939
1940 b) But if we have to wake up, then let's make sure to
1941 dispatch as much as possible on the entire system.
1942
1943 We implement this by waking up everywhere at the same time
1944 within any given minute if we can, synchronised via the
1945 perturbation value determined from the boot ID. If we can't,
1946 then we try to find the same spot in every 10s, then 1s and
1947 then 250ms step. Otherwise, we pick the last possible time
1948 to wake up.
1949 */
1950
1951 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
1952 if (c >= b) {
1953 if (_unlikely_(c < USEC_PER_MINUTE))
1954 return b;
1955
1956 c -= USEC_PER_MINUTE;
1957 }
1958
1959 if (c >= a)
1960 return c;
1961
1962 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
1963 if (c >= b) {
1964 if (_unlikely_(c < USEC_PER_SEC*10))
1965 return b;
1966
1967 c -= USEC_PER_SEC*10;
1968 }
1969
1970 if (c >= a)
1971 return c;
1972
1973 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
1974 if (c >= b) {
1975 if (_unlikely_(c < USEC_PER_SEC))
1976 return b;
1977
1978 c -= USEC_PER_SEC;
1979 }
1980
1981 if (c >= a)
1982 return c;
1983
1984 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
1985 if (c >= b) {
1986 if (_unlikely_(c < USEC_PER_MSEC*250))
1987 return b;
1988
1989 c -= USEC_PER_MSEC*250;
1990 }
1991
1992 if (c >= a)
1993 return c;
1994
1995 return b;
1996 }
1997
1998 static int event_arm_timer(
1999 sd_event *e,
2000 struct clock_data *d) {
2001
2002 struct itimerspec its = {};
2003 sd_event_source *a, *b;
2004 usec_t t;
2005 int r;
2006
2007 assert(e);
2008 assert(d);
2009
2010 if (!d->needs_rearm)
2011 return 0;
2012 else
2013 d->needs_rearm = false;
2014
2015 a = prioq_peek(d->earliest);
2016 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2017
2018 if (d->fd < 0)
2019 return 0;
2020
2021 if (d->next == USEC_INFINITY)
2022 return 0;
2023
2024 /* disarm */
2025 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2026 if (r < 0)
2027 return r;
2028
2029 d->next = USEC_INFINITY;
2030 return 0;
2031 }
2032
2033 b = prioq_peek(d->latest);
2034 assert_se(b && b->enabled != SD_EVENT_OFF);
2035
2036 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2037 if (d->next == t)
2038 return 0;
2039
2040 assert_se(d->fd >= 0);
2041
2042 if (t == 0) {
2043 /* We don' want to disarm here, just mean some time looooong ago. */
2044 its.it_value.tv_sec = 0;
2045 its.it_value.tv_nsec = 1;
2046 } else
2047 timespec_store(&its.it_value, t);
2048
2049 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2050 if (r < 0)
2051 return -errno;
2052
2053 d->next = t;
2054 return 0;
2055 }
2056
2057 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2058 assert(e);
2059 assert(s);
2060 assert(s->type == SOURCE_IO);
2061
2062 /* If the event source was already pending, we just OR in the
2063 * new revents, otherwise we reset the value. The ORing is
2064 * necessary to handle EPOLLONESHOT events properly where
2065 * readability might happen independently of writability, and
2066 * we need to keep track of both */
2067
2068 if (s->pending)
2069 s->io.revents |= revents;
2070 else
2071 s->io.revents = revents;
2072
2073 return source_set_pending(s, true);
2074 }
2075
2076 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2077 uint64_t x;
2078 ssize_t ss;
2079
2080 assert(e);
2081 assert(fd >= 0);
2082
2083 assert_return(events == EPOLLIN, -EIO);
2084
2085 ss = read(fd, &x, sizeof(x));
2086 if (ss < 0) {
2087 if (IN_SET(errno, EAGAIN, EINTR))
2088 return 0;
2089
2090 return -errno;
2091 }
2092
2093 if (_unlikely_(ss != sizeof(x)))
2094 return -EIO;
2095
2096 if (next)
2097 *next = USEC_INFINITY;
2098
2099 return 0;
2100 }
2101
2102 static int process_timer(
2103 sd_event *e,
2104 usec_t n,
2105 struct clock_data *d) {
2106
2107 sd_event_source *s;
2108 int r;
2109
2110 assert(e);
2111 assert(d);
2112
2113 for (;;) {
2114 s = prioq_peek(d->earliest);
2115 if (!s ||
2116 s->time.next > n ||
2117 s->enabled == SD_EVENT_OFF ||
2118 s->pending)
2119 break;
2120
2121 r = source_set_pending(s, true);
2122 if (r < 0)
2123 return r;
2124
2125 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2126 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2127 d->needs_rearm = true;
2128 }
2129
2130 return 0;
2131 }
2132
2133 static int process_child(sd_event *e) {
2134 sd_event_source *s;
2135 Iterator i;
2136 int r;
2137
2138 assert(e);
2139
2140 e->need_process_child = false;
2141
2142 /*
2143 So, this is ugly. We iteratively invoke waitid() with P_PID
2144 + WNOHANG for each PID we wait for, instead of using
2145 P_ALL. This is because we only want to get child
2146 information of very specific child processes, and not all
2147 of them. We might not have processed the SIGCHLD even of a
2148 previous invocation and we don't want to maintain a
2149 unbounded *per-child* event queue, hence we really don't
2150 want anything flushed out of the kernel's queue that we
2151 don't care about. Since this is O(n) this means that if you
2152 have a lot of processes you probably want to handle SIGCHLD
2153 yourself.
2154
2155 We do not reap the children here (by using WNOWAIT), this
2156 is only done after the event source is dispatched so that
2157 the callback still sees the process as a zombie.
2158 */
2159
2160 HASHMAP_FOREACH(s, e->child_sources, i) {
2161 assert(s->type == SOURCE_CHILD);
2162
2163 if (s->pending)
2164 continue;
2165
2166 if (s->enabled == SD_EVENT_OFF)
2167 continue;
2168
2169 zero(s->child.siginfo);
2170 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2171 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2172 if (r < 0)
2173 return -errno;
2174
2175 if (s->child.siginfo.si_pid != 0) {
2176 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2177
2178 if (!zombie && (s->child.options & WEXITED)) {
2179 /* If the child isn't dead then let's
2180 * immediately remove the state change
2181 * from the queue, since there's no
2182 * benefit in leaving it queued */
2183
2184 assert(s->child.options & (WSTOPPED|WCONTINUED));
2185 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2186 }
2187
2188 r = source_set_pending(s, true);
2189 if (r < 0)
2190 return r;
2191 }
2192 }
2193
2194 return 0;
2195 }
2196
2197 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2198 bool read_one = false;
2199 int r;
2200
2201 assert(e);
2202 assert_return(events == EPOLLIN, -EIO);
2203
2204 /* If there's a signal queued on this priority and SIGCHLD is
2205 on this priority too, then make sure to recheck the
2206 children we watch. This is because we only ever dequeue
2207 the first signal per priority, and if we dequeue one, and
2208 SIGCHLD might be enqueued later we wouldn't know, but we
2209 might have higher priority children we care about hence we
2210 need to check that explicitly. */
2211
2212 if (sigismember(&d->sigset, SIGCHLD))
2213 e->need_process_child = true;
2214
2215 /* If there's already an event source pending for this
2216 * priority we don't read another */
2217 if (d->current)
2218 return 0;
2219
2220 for (;;) {
2221 struct signalfd_siginfo si;
2222 ssize_t n;
2223 sd_event_source *s = NULL;
2224
2225 n = read(d->fd, &si, sizeof(si));
2226 if (n < 0) {
2227 if (IN_SET(errno, EAGAIN, EINTR))
2228 return read_one;
2229
2230 return -errno;
2231 }
2232
2233 if (_unlikely_(n != sizeof(si)))
2234 return -EIO;
2235
2236 assert(SIGNAL_VALID(si.ssi_signo));
2237
2238 read_one = true;
2239
2240 if (e->signal_sources)
2241 s = e->signal_sources[si.ssi_signo];
2242 if (!s)
2243 continue;
2244 if (s->pending)
2245 continue;
2246
2247 s->signal.siginfo = si;
2248 d->current = s;
2249
2250 r = source_set_pending(s, true);
2251 if (r < 0)
2252 return r;
2253
2254 return 1;
2255 }
2256 }
2257
2258 static int source_dispatch(sd_event_source *s) {
2259 EventSourceType saved_type;
2260 int r = 0;
2261
2262 assert(s);
2263 assert(s->pending || s->type == SOURCE_EXIT);
2264
2265 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2266 * the event. */
2267 saved_type = s->type;
2268
2269 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2270 r = source_set_pending(s, false);
2271 if (r < 0)
2272 return r;
2273 }
2274
2275 if (s->type != SOURCE_POST) {
2276 sd_event_source *z;
2277 Iterator i;
2278
2279 /* If we execute a non-post source, let's mark all
2280 * post sources as pending */
2281
2282 SET_FOREACH(z, s->event->post_sources, i) {
2283 if (z->enabled == SD_EVENT_OFF)
2284 continue;
2285
2286 r = source_set_pending(z, true);
2287 if (r < 0)
2288 return r;
2289 }
2290 }
2291
2292 if (s->enabled == SD_EVENT_ONESHOT) {
2293 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2294 if (r < 0)
2295 return r;
2296 }
2297
2298 s->dispatching = true;
2299
2300 switch (s->type) {
2301
2302 case SOURCE_IO:
2303 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2304 break;
2305
2306 case SOURCE_TIME_REALTIME:
2307 case SOURCE_TIME_BOOTTIME:
2308 case SOURCE_TIME_MONOTONIC:
2309 case SOURCE_TIME_REALTIME_ALARM:
2310 case SOURCE_TIME_BOOTTIME_ALARM:
2311 r = s->time.callback(s, s->time.next, s->userdata);
2312 break;
2313
2314 case SOURCE_SIGNAL:
2315 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2316 break;
2317
2318 case SOURCE_CHILD: {
2319 bool zombie;
2320
2321 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2322
2323 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2324
2325 /* Now, reap the PID for good. */
2326 if (zombie)
2327 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2328
2329 break;
2330 }
2331
2332 case SOURCE_DEFER:
2333 r = s->defer.callback(s, s->userdata);
2334 break;
2335
2336 case SOURCE_POST:
2337 r = s->post.callback(s, s->userdata);
2338 break;
2339
2340 case SOURCE_EXIT:
2341 r = s->exit.callback(s, s->userdata);
2342 break;
2343
2344 case SOURCE_WATCHDOG:
2345 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2346 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2347 assert_not_reached("Wut? I shouldn't exist.");
2348 }
2349
2350 s->dispatching = false;
2351
2352 if (r < 0)
2353 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2354 strna(s->description), event_source_type_to_string(saved_type));
2355
2356 if (s->n_ref == 0)
2357 source_free(s);
2358 else if (r < 0)
2359 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2360
2361 return 1;
2362 }
2363
2364 static int event_prepare(sd_event *e) {
2365 int r;
2366
2367 assert(e);
2368
2369 for (;;) {
2370 sd_event_source *s;
2371
2372 s = prioq_peek(e->prepare);
2373 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2374 break;
2375
2376 s->prepare_iteration = e->iteration;
2377 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2378 if (r < 0)
2379 return r;
2380
2381 assert(s->prepare);
2382
2383 s->dispatching = true;
2384 r = s->prepare(s, s->userdata);
2385 s->dispatching = false;
2386
2387 if (r < 0)
2388 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2389 strna(s->description), event_source_type_to_string(s->type));
2390
2391 if (s->n_ref == 0)
2392 source_free(s);
2393 else if (r < 0)
2394 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2395 }
2396
2397 return 0;
2398 }
2399
2400 static int dispatch_exit(sd_event *e) {
2401 sd_event_source *p;
2402 int r;
2403
2404 assert(e);
2405
2406 p = prioq_peek(e->exit);
2407 if (!p || p->enabled == SD_EVENT_OFF) {
2408 e->state = SD_EVENT_FINISHED;
2409 return 0;
2410 }
2411
2412 sd_event_ref(e);
2413 e->iteration++;
2414 e->state = SD_EVENT_EXITING;
2415
2416 r = source_dispatch(p);
2417
2418 e->state = SD_EVENT_INITIAL;
2419 sd_event_unref(e);
2420
2421 return r;
2422 }
2423
2424 static sd_event_source* event_next_pending(sd_event *e) {
2425 sd_event_source *p;
2426
2427 assert(e);
2428
2429 p = prioq_peek(e->pending);
2430 if (!p)
2431 return NULL;
2432
2433 if (p->enabled == SD_EVENT_OFF)
2434 return NULL;
2435
2436 return p;
2437 }
2438
2439 static int arm_watchdog(sd_event *e) {
2440 struct itimerspec its = {};
2441 usec_t t;
2442 int r;
2443
2444 assert(e);
2445 assert(e->watchdog_fd >= 0);
2446
2447 t = sleep_between(e,
2448 e->watchdog_last + (e->watchdog_period / 2),
2449 e->watchdog_last + (e->watchdog_period * 3 / 4));
2450
2451 timespec_store(&its.it_value, t);
2452
2453 /* Make sure we never set the watchdog to 0, which tells the
2454 * kernel to disable it. */
2455 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2456 its.it_value.tv_nsec = 1;
2457
2458 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2459 if (r < 0)
2460 return -errno;
2461
2462 return 0;
2463 }
2464
2465 static int process_watchdog(sd_event *e) {
2466 assert(e);
2467
2468 if (!e->watchdog)
2469 return 0;
2470
2471 /* Don't notify watchdog too often */
2472 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
2473 return 0;
2474
2475 sd_notify(false, "WATCHDOG=1");
2476 e->watchdog_last = e->timestamp.monotonic;
2477
2478 return arm_watchdog(e);
2479 }
2480
2481 _public_ int sd_event_prepare(sd_event *e) {
2482 int r;
2483
2484 assert_return(e, -EINVAL);
2485 assert_return(e = event_resolve(e), -ENOPKG);
2486 assert_return(!event_pid_changed(e), -ECHILD);
2487 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2488 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2489
2490 if (e->exit_requested)
2491 goto pending;
2492
2493 e->iteration++;
2494
2495 e->state = SD_EVENT_PREPARING;
2496 r = event_prepare(e);
2497 e->state = SD_EVENT_INITIAL;
2498 if (r < 0)
2499 return r;
2500
2501 r = event_arm_timer(e, &e->realtime);
2502 if (r < 0)
2503 return r;
2504
2505 r = event_arm_timer(e, &e->boottime);
2506 if (r < 0)
2507 return r;
2508
2509 r = event_arm_timer(e, &e->monotonic);
2510 if (r < 0)
2511 return r;
2512
2513 r = event_arm_timer(e, &e->realtime_alarm);
2514 if (r < 0)
2515 return r;
2516
2517 r = event_arm_timer(e, &e->boottime_alarm);
2518 if (r < 0)
2519 return r;
2520
2521 if (event_next_pending(e) || e->need_process_child)
2522 goto pending;
2523
2524 e->state = SD_EVENT_ARMED;
2525
2526 return 0;
2527
2528 pending:
2529 e->state = SD_EVENT_ARMED;
2530 r = sd_event_wait(e, 0);
2531 if (r == 0)
2532 e->state = SD_EVENT_ARMED;
2533
2534 return r;
2535 }
2536
2537 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
2538 struct epoll_event *ev_queue;
2539 unsigned ev_queue_max;
2540 int r, m, i;
2541
2542 assert_return(e, -EINVAL);
2543 assert_return(e = event_resolve(e), -ENOPKG);
2544 assert_return(!event_pid_changed(e), -ECHILD);
2545 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2546 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
2547
2548 if (e->exit_requested) {
2549 e->state = SD_EVENT_PENDING;
2550 return 1;
2551 }
2552
2553 ev_queue_max = MAX(e->n_sources, 1u);
2554 ev_queue = newa(struct epoll_event, ev_queue_max);
2555
2556 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
2557 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
2558 if (m < 0) {
2559 if (errno == EINTR) {
2560 e->state = SD_EVENT_PENDING;
2561 return 1;
2562 }
2563
2564 r = -errno;
2565 goto finish;
2566 }
2567
2568 triple_timestamp_get(&e->timestamp);
2569
2570 for (i = 0; i < m; i++) {
2571
2572 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
2573 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
2574 else {
2575 WakeupType *t = ev_queue[i].data.ptr;
2576
2577 switch (*t) {
2578
2579 case WAKEUP_EVENT_SOURCE:
2580 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
2581 break;
2582
2583 case WAKEUP_CLOCK_DATA: {
2584 struct clock_data *d = ev_queue[i].data.ptr;
2585 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
2586 break;
2587 }
2588
2589 case WAKEUP_SIGNAL_DATA:
2590 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
2591 break;
2592
2593 default:
2594 assert_not_reached("Invalid wake-up pointer");
2595 }
2596 }
2597 if (r < 0)
2598 goto finish;
2599 }
2600
2601 r = process_watchdog(e);
2602 if (r < 0)
2603 goto finish;
2604
2605 r = process_timer(e, e->timestamp.realtime, &e->realtime);
2606 if (r < 0)
2607 goto finish;
2608
2609 r = process_timer(e, e->timestamp.boottime, &e->boottime);
2610 if (r < 0)
2611 goto finish;
2612
2613 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
2614 if (r < 0)
2615 goto finish;
2616
2617 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
2618 if (r < 0)
2619 goto finish;
2620
2621 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
2622 if (r < 0)
2623 goto finish;
2624
2625 if (e->need_process_child) {
2626 r = process_child(e);
2627 if (r < 0)
2628 goto finish;
2629 }
2630
2631 if (event_next_pending(e)) {
2632 e->state = SD_EVENT_PENDING;
2633
2634 return 1;
2635 }
2636
2637 r = 0;
2638
2639 finish:
2640 e->state = SD_EVENT_INITIAL;
2641
2642 return r;
2643 }
2644
2645 _public_ int sd_event_dispatch(sd_event *e) {
2646 sd_event_source *p;
2647 int r;
2648
2649 assert_return(e, -EINVAL);
2650 assert_return(e = event_resolve(e), -ENOPKG);
2651 assert_return(!event_pid_changed(e), -ECHILD);
2652 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2653 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
2654
2655 if (e->exit_requested)
2656 return dispatch_exit(e);
2657
2658 p = event_next_pending(e);
2659 if (p) {
2660 sd_event_ref(e);
2661
2662 e->state = SD_EVENT_RUNNING;
2663 r = source_dispatch(p);
2664 e->state = SD_EVENT_INITIAL;
2665
2666 sd_event_unref(e);
2667
2668 return r;
2669 }
2670
2671 e->state = SD_EVENT_INITIAL;
2672
2673 return 1;
2674 }
2675
2676 static void event_log_delays(sd_event *e) {
2677 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
2678 unsigned i;
2679 int o;
2680
2681 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
2682 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
2683 e->delays[i] = 0;
2684 }
2685 log_debug("Event loop iterations: %.*s", o, b);
2686 }
2687
2688 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
2689 int r;
2690
2691 assert_return(e, -EINVAL);
2692 assert_return(e = event_resolve(e), -ENOPKG);
2693 assert_return(!event_pid_changed(e), -ECHILD);
2694 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2695 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2696
2697 if (e->profile_delays && e->last_run) {
2698 usec_t this_run;
2699 unsigned l;
2700
2701 this_run = now(CLOCK_MONOTONIC);
2702
2703 l = u64log2(this_run - e->last_run);
2704 assert(l < sizeof(e->delays));
2705 e->delays[l]++;
2706
2707 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
2708 event_log_delays(e);
2709 e->last_log = this_run;
2710 }
2711 }
2712
2713 r = sd_event_prepare(e);
2714 if (r == 0)
2715 /* There was nothing? Then wait... */
2716 r = sd_event_wait(e, timeout);
2717
2718 if (e->profile_delays)
2719 e->last_run = now(CLOCK_MONOTONIC);
2720
2721 if (r > 0) {
2722 /* There's something now, then let's dispatch it */
2723 r = sd_event_dispatch(e);
2724 if (r < 0)
2725 return r;
2726
2727 return 1;
2728 }
2729
2730 return r;
2731 }
2732
2733 _public_ int sd_event_loop(sd_event *e) {
2734 int r;
2735
2736 assert_return(e, -EINVAL);
2737 assert_return(e = event_resolve(e), -ENOPKG);
2738 assert_return(!event_pid_changed(e), -ECHILD);
2739 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
2740
2741 sd_event_ref(e);
2742
2743 while (e->state != SD_EVENT_FINISHED) {
2744 r = sd_event_run(e, (uint64_t) -1);
2745 if (r < 0)
2746 goto finish;
2747 }
2748
2749 r = e->exit_code;
2750
2751 finish:
2752 sd_event_unref(e);
2753 return r;
2754 }
2755
2756 _public_ int sd_event_get_fd(sd_event *e) {
2757
2758 assert_return(e, -EINVAL);
2759 assert_return(e = event_resolve(e), -ENOPKG);
2760 assert_return(!event_pid_changed(e), -ECHILD);
2761
2762 return e->epoll_fd;
2763 }
2764
2765 _public_ int sd_event_get_state(sd_event *e) {
2766 assert_return(e, -EINVAL);
2767 assert_return(e = event_resolve(e), -ENOPKG);
2768 assert_return(!event_pid_changed(e), -ECHILD);
2769
2770 return e->state;
2771 }
2772
2773 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
2774 assert_return(e, -EINVAL);
2775 assert_return(e = event_resolve(e), -ENOPKG);
2776 assert_return(code, -EINVAL);
2777 assert_return(!event_pid_changed(e), -ECHILD);
2778
2779 if (!e->exit_requested)
2780 return -ENODATA;
2781
2782 *code = e->exit_code;
2783 return 0;
2784 }
2785
2786 _public_ int sd_event_exit(sd_event *e, int code) {
2787 assert_return(e, -EINVAL);
2788 assert_return(e = event_resolve(e), -ENOPKG);
2789 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2790 assert_return(!event_pid_changed(e), -ECHILD);
2791
2792 e->exit_requested = true;
2793 e->exit_code = code;
2794
2795 return 0;
2796 }
2797
2798 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
2799 assert_return(e, -EINVAL);
2800 assert_return(e = event_resolve(e), -ENOPKG);
2801 assert_return(usec, -EINVAL);
2802 assert_return(!event_pid_changed(e), -ECHILD);
2803
2804 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
2805 return -EOPNOTSUPP;
2806
2807 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
2808 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
2809 * the purpose of getting the time this doesn't matter. */
2810 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
2811 return -EOPNOTSUPP;
2812
2813 if (!triple_timestamp_is_set(&e->timestamp)) {
2814 /* Implicitly fall back to now() if we never ran
2815 * before and thus have no cached time. */
2816 *usec = now(clock);
2817 return 1;
2818 }
2819
2820 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
2821 return 0;
2822 }
2823
2824 _public_ int sd_event_default(sd_event **ret) {
2825 sd_event *e = NULL;
2826 int r;
2827
2828 if (!ret)
2829 return !!default_event;
2830
2831 if (default_event) {
2832 *ret = sd_event_ref(default_event);
2833 return 0;
2834 }
2835
2836 r = sd_event_new(&e);
2837 if (r < 0)
2838 return r;
2839
2840 e->default_event_ptr = &default_event;
2841 e->tid = gettid();
2842 default_event = e;
2843
2844 *ret = e;
2845 return 1;
2846 }
2847
2848 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
2849 assert_return(e, -EINVAL);
2850 assert_return(e = event_resolve(e), -ENOPKG);
2851 assert_return(tid, -EINVAL);
2852 assert_return(!event_pid_changed(e), -ECHILD);
2853
2854 if (e->tid != 0) {
2855 *tid = e->tid;
2856 return 0;
2857 }
2858
2859 return -ENXIO;
2860 }
2861
2862 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
2863 int r;
2864
2865 assert_return(e, -EINVAL);
2866 assert_return(e = event_resolve(e), -ENOPKG);
2867 assert_return(!event_pid_changed(e), -ECHILD);
2868
2869 if (e->watchdog == !!b)
2870 return e->watchdog;
2871
2872 if (b) {
2873 struct epoll_event ev = {};
2874
2875 r = sd_watchdog_enabled(false, &e->watchdog_period);
2876 if (r <= 0)
2877 return r;
2878
2879 /* Issue first ping immediately */
2880 sd_notify(false, "WATCHDOG=1");
2881 e->watchdog_last = now(CLOCK_MONOTONIC);
2882
2883 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
2884 if (e->watchdog_fd < 0)
2885 return -errno;
2886
2887 r = arm_watchdog(e);
2888 if (r < 0)
2889 goto fail;
2890
2891 ev.events = EPOLLIN;
2892 ev.data.ptr = INT_TO_PTR(SOURCE_WATCHDOG);
2893
2894 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
2895 if (r < 0) {
2896 r = -errno;
2897 goto fail;
2898 }
2899
2900 } else {
2901 if (e->watchdog_fd >= 0) {
2902 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
2903 e->watchdog_fd = safe_close(e->watchdog_fd);
2904 }
2905 }
2906
2907 e->watchdog = !!b;
2908 return e->watchdog;
2909
2910 fail:
2911 e->watchdog_fd = safe_close(e->watchdog_fd);
2912 return r;
2913 }
2914
2915 _public_ int sd_event_get_watchdog(sd_event *e) {
2916 assert_return(e, -EINVAL);
2917 assert_return(e = event_resolve(e), -ENOPKG);
2918 assert_return(!event_pid_changed(e), -ECHILD);
2919
2920 return e->watchdog;
2921 }
2922
2923 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
2924 assert_return(e, -EINVAL);
2925 assert_return(e = event_resolve(e), -ENOPKG);
2926 assert_return(!event_pid_changed(e), -ECHILD);
2927
2928 *ret = e->iteration;
2929 return 0;
2930 }