]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
196b90d59b3ecd1ba691499c0e9092b05ce6a097
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 This file is part of systemd.
4
5 Copyright 2013 Lennart Poettering
6 ***/
7
8 #include <sys/epoll.h>
9 #include <sys/timerfd.h>
10 #include <sys/wait.h>
11
12 #include "sd-daemon.h"
13 #include "sd-event.h"
14 #include "sd-id128.h"
15
16 #include "alloc-util.h"
17 #include "fd-util.h"
18 #include "fs-util.h"
19 #include "hashmap.h"
20 #include "list.h"
21 #include "macro.h"
22 #include "missing.h"
23 #include "prioq.h"
24 #include "process-util.h"
25 #include "set.h"
26 #include "signal-util.h"
27 #include "string-table.h"
28 #include "string-util.h"
29 #include "time-util.h"
30 #include "util.h"
31
32 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
33
34 typedef enum EventSourceType {
35 SOURCE_IO,
36 SOURCE_TIME_REALTIME,
37 SOURCE_TIME_BOOTTIME,
38 SOURCE_TIME_MONOTONIC,
39 SOURCE_TIME_REALTIME_ALARM,
40 SOURCE_TIME_BOOTTIME_ALARM,
41 SOURCE_SIGNAL,
42 SOURCE_CHILD,
43 SOURCE_DEFER,
44 SOURCE_POST,
45 SOURCE_EXIT,
46 SOURCE_WATCHDOG,
47 SOURCE_INOTIFY,
48 _SOURCE_EVENT_SOURCE_TYPE_MAX,
49 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
50 } EventSourceType;
51
52 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
53 [SOURCE_IO] = "io",
54 [SOURCE_TIME_REALTIME] = "realtime",
55 [SOURCE_TIME_BOOTTIME] = "bootime",
56 [SOURCE_TIME_MONOTONIC] = "monotonic",
57 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
58 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
59 [SOURCE_SIGNAL] = "signal",
60 [SOURCE_CHILD] = "child",
61 [SOURCE_DEFER] = "defer",
62 [SOURCE_POST] = "post",
63 [SOURCE_EXIT] = "exit",
64 [SOURCE_WATCHDOG] = "watchdog",
65 [SOURCE_INOTIFY] = "inotify",
66 };
67
68 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
69
70 /* All objects we use in epoll events start with this value, so that
71 * we know how to dispatch it */
72 typedef enum WakeupType {
73 WAKEUP_NONE,
74 WAKEUP_EVENT_SOURCE,
75 WAKEUP_CLOCK_DATA,
76 WAKEUP_SIGNAL_DATA,
77 WAKEUP_INOTIFY_DATA,
78 _WAKEUP_TYPE_MAX,
79 _WAKEUP_TYPE_INVALID = -1,
80 } WakeupType;
81
82 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
83
84 struct inode_data;
85
86 struct sd_event_source {
87 WakeupType wakeup;
88
89 unsigned n_ref;
90
91 sd_event *event;
92 void *userdata;
93 sd_event_handler_t prepare;
94
95 char *description;
96
97 EventSourceType type:5;
98 int enabled:3;
99 bool pending:1;
100 bool dispatching:1;
101 bool floating:1;
102
103 int64_t priority;
104 unsigned pending_index;
105 unsigned prepare_index;
106 uint64_t pending_iteration;
107 uint64_t prepare_iteration;
108
109 sd_event_destroy_t destroy_callback;
110
111 LIST_FIELDS(sd_event_source, sources);
112
113 union {
114 struct {
115 sd_event_io_handler_t callback;
116 int fd;
117 uint32_t events;
118 uint32_t revents;
119 bool registered:1;
120 bool owned:1;
121 } io;
122 struct {
123 sd_event_time_handler_t callback;
124 usec_t next, accuracy;
125 unsigned earliest_index;
126 unsigned latest_index;
127 } time;
128 struct {
129 sd_event_signal_handler_t callback;
130 struct signalfd_siginfo siginfo;
131 int sig;
132 } signal;
133 struct {
134 sd_event_child_handler_t callback;
135 siginfo_t siginfo;
136 pid_t pid;
137 int options;
138 } child;
139 struct {
140 sd_event_handler_t callback;
141 } defer;
142 struct {
143 sd_event_handler_t callback;
144 } post;
145 struct {
146 sd_event_handler_t callback;
147 unsigned prioq_index;
148 } exit;
149 struct {
150 sd_event_inotify_handler_t callback;
151 uint32_t mask;
152 struct inode_data *inode_data;
153 LIST_FIELDS(sd_event_source, by_inode_data);
154 } inotify;
155 };
156 };
157
158 struct clock_data {
159 WakeupType wakeup;
160 int fd;
161
162 /* For all clocks we maintain two priority queues each, one
163 * ordered for the earliest times the events may be
164 * dispatched, and one ordered by the latest times they must
165 * have been dispatched. The range between the top entries in
166 * the two prioqs is the time window we can freely schedule
167 * wakeups in */
168
169 Prioq *earliest;
170 Prioq *latest;
171 usec_t next;
172
173 bool needs_rearm:1;
174 };
175
176 struct signal_data {
177 WakeupType wakeup;
178
179 /* For each priority we maintain one signal fd, so that we
180 * only have to dequeue a single event per priority at a
181 * time. */
182
183 int fd;
184 int64_t priority;
185 sigset_t sigset;
186 sd_event_source *current;
187 };
188
189 /* A structure listing all event sources currently watching a specific inode */
190 struct inode_data {
191 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
192 ino_t ino;
193 dev_t dev;
194
195 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
196 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
197 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
198 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
199 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
200 * iteration. */
201 int fd;
202
203 /* The inotify "watch descriptor" */
204 int wd;
205
206 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
207 * most recently been set on the watch descriptor. */
208 uint32_t combined_mask;
209
210 /* All event sources subscribed to this inode */
211 LIST_HEAD(sd_event_source, event_sources);
212
213 /* The inotify object we watch this inode with */
214 struct inotify_data *inotify_data;
215
216 /* A linked list of all inode data objects with fds to close (see above) */
217 LIST_FIELDS(struct inode_data, to_close);
218 };
219
220 /* A structure encapsulating an inotify fd */
221 struct inotify_data {
222 WakeupType wakeup;
223
224 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
225 * a time */
226
227 int fd;
228 int64_t priority;
229
230 Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
231 Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
232
233 /* The buffer we read inotify events into */
234 union inotify_event_buffer buffer;
235 size_t buffer_filled; /* fill level of the buffer */
236
237 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
238 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
239 * the events locally if they can't be coalesced). */
240 unsigned n_pending;
241
242 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
243 * to make it efficient to figure out what inotify objects to process data on next. */
244 LIST_FIELDS(struct inotify_data, buffered);
245 };
246
247 struct sd_event {
248 unsigned n_ref;
249
250 int epoll_fd;
251 int watchdog_fd;
252
253 Prioq *pending;
254 Prioq *prepare;
255
256 /* timerfd_create() only supports these five clocks so far. We
257 * can add support for more clocks when the kernel learns to
258 * deal with them, too. */
259 struct clock_data realtime;
260 struct clock_data boottime;
261 struct clock_data monotonic;
262 struct clock_data realtime_alarm;
263 struct clock_data boottime_alarm;
264
265 usec_t perturb;
266
267 sd_event_source **signal_sources; /* indexed by signal number */
268 Hashmap *signal_data; /* indexed by priority */
269
270 Hashmap *child_sources;
271 unsigned n_enabled_child_sources;
272
273 Set *post_sources;
274
275 Prioq *exit;
276
277 Hashmap *inotify_data; /* indexed by priority */
278
279 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
280 LIST_HEAD(struct inode_data, inode_data_to_close);
281
282 /* A list of inotify objects that already have events buffered which aren't processed yet */
283 LIST_HEAD(struct inotify_data, inotify_data_buffered);
284
285 pid_t original_pid;
286
287 uint64_t iteration;
288 triple_timestamp timestamp;
289 int state;
290
291 bool exit_requested:1;
292 bool need_process_child:1;
293 bool watchdog:1;
294 bool profile_delays:1;
295
296 int exit_code;
297
298 pid_t tid;
299 sd_event **default_event_ptr;
300
301 usec_t watchdog_last, watchdog_period;
302
303 unsigned n_sources;
304
305 LIST_HEAD(sd_event_source, sources);
306
307 usec_t last_run, last_log;
308 unsigned delays[sizeof(usec_t) * 8];
309 };
310
311 static thread_local sd_event *default_event = NULL;
312
313 static void source_disconnect(sd_event_source *s);
314 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
315
316 static sd_event *event_resolve(sd_event *e) {
317 return e == SD_EVENT_DEFAULT ? default_event : e;
318 }
319
320 static int pending_prioq_compare(const void *a, const void *b) {
321 const sd_event_source *x = a, *y = b;
322
323 assert(x->pending);
324 assert(y->pending);
325
326 /* Enabled ones first */
327 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
328 return -1;
329 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
330 return 1;
331
332 /* Lower priority values first */
333 if (x->priority < y->priority)
334 return -1;
335 if (x->priority > y->priority)
336 return 1;
337
338 /* Older entries first */
339 if (x->pending_iteration < y->pending_iteration)
340 return -1;
341 if (x->pending_iteration > y->pending_iteration)
342 return 1;
343
344 return 0;
345 }
346
347 static int prepare_prioq_compare(const void *a, const void *b) {
348 const sd_event_source *x = a, *y = b;
349
350 assert(x->prepare);
351 assert(y->prepare);
352
353 /* Enabled ones first */
354 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
355 return -1;
356 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
357 return 1;
358
359 /* Move most recently prepared ones last, so that we can stop
360 * preparing as soon as we hit one that has already been
361 * prepared in the current iteration */
362 if (x->prepare_iteration < y->prepare_iteration)
363 return -1;
364 if (x->prepare_iteration > y->prepare_iteration)
365 return 1;
366
367 /* Lower priority values first */
368 if (x->priority < y->priority)
369 return -1;
370 if (x->priority > y->priority)
371 return 1;
372
373 return 0;
374 }
375
376 static int earliest_time_prioq_compare(const void *a, const void *b) {
377 const sd_event_source *x = a, *y = b;
378
379 assert(EVENT_SOURCE_IS_TIME(x->type));
380 assert(x->type == y->type);
381
382 /* Enabled ones first */
383 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
384 return -1;
385 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
386 return 1;
387
388 /* Move the pending ones to the end */
389 if (!x->pending && y->pending)
390 return -1;
391 if (x->pending && !y->pending)
392 return 1;
393
394 /* Order by time */
395 if (x->time.next < y->time.next)
396 return -1;
397 if (x->time.next > y->time.next)
398 return 1;
399
400 return 0;
401 }
402
403 static usec_t time_event_source_latest(const sd_event_source *s) {
404 return usec_add(s->time.next, s->time.accuracy);
405 }
406
407 static int latest_time_prioq_compare(const void *a, const void *b) {
408 const sd_event_source *x = a, *y = b;
409
410 assert(EVENT_SOURCE_IS_TIME(x->type));
411 assert(x->type == y->type);
412
413 /* Enabled ones first */
414 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
415 return -1;
416 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
417 return 1;
418
419 /* Move the pending ones to the end */
420 if (!x->pending && y->pending)
421 return -1;
422 if (x->pending && !y->pending)
423 return 1;
424
425 /* Order by time */
426 if (time_event_source_latest(x) < time_event_source_latest(y))
427 return -1;
428 if (time_event_source_latest(x) > time_event_source_latest(y))
429 return 1;
430
431 return 0;
432 }
433
434 static int exit_prioq_compare(const void *a, const void *b) {
435 const sd_event_source *x = a, *y = b;
436
437 assert(x->type == SOURCE_EXIT);
438 assert(y->type == SOURCE_EXIT);
439
440 /* Enabled ones first */
441 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
442 return -1;
443 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
444 return 1;
445
446 /* Lower priority values first */
447 if (x->priority < y->priority)
448 return -1;
449 if (x->priority > y->priority)
450 return 1;
451
452 return 0;
453 }
454
455 static void free_clock_data(struct clock_data *d) {
456 assert(d);
457 assert(d->wakeup == WAKEUP_CLOCK_DATA);
458
459 safe_close(d->fd);
460 prioq_free(d->earliest);
461 prioq_free(d->latest);
462 }
463
464 static void event_free(sd_event *e) {
465 sd_event_source *s;
466
467 assert(e);
468
469 while ((s = e->sources)) {
470 assert(s->floating);
471 source_disconnect(s);
472 sd_event_source_unref(s);
473 }
474
475 assert(e->n_sources == 0);
476
477 if (e->default_event_ptr)
478 *(e->default_event_ptr) = NULL;
479
480 safe_close(e->epoll_fd);
481 safe_close(e->watchdog_fd);
482
483 free_clock_data(&e->realtime);
484 free_clock_data(&e->boottime);
485 free_clock_data(&e->monotonic);
486 free_clock_data(&e->realtime_alarm);
487 free_clock_data(&e->boottime_alarm);
488
489 prioq_free(e->pending);
490 prioq_free(e->prepare);
491 prioq_free(e->exit);
492
493 free(e->signal_sources);
494 hashmap_free(e->signal_data);
495
496 hashmap_free(e->inotify_data);
497
498 hashmap_free(e->child_sources);
499 set_free(e->post_sources);
500 free(e);
501 }
502
503 _public_ int sd_event_new(sd_event** ret) {
504 sd_event *e;
505 int r;
506
507 assert_return(ret, -EINVAL);
508
509 e = new(sd_event, 1);
510 if (!e)
511 return -ENOMEM;
512
513 *e = (sd_event) {
514 .n_ref = 1,
515 .epoll_fd = -1,
516 .watchdog_fd = -1,
517 .realtime.wakeup = WAKEUP_CLOCK_DATA,
518 .realtime.fd = -1,
519 .realtime.next = USEC_INFINITY,
520 .boottime.wakeup = WAKEUP_CLOCK_DATA,
521 .boottime.fd = -1,
522 .boottime.next = USEC_INFINITY,
523 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
524 .monotonic.fd = -1,
525 .monotonic.next = USEC_INFINITY,
526 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
527 .realtime_alarm.fd = -1,
528 .realtime_alarm.next = USEC_INFINITY,
529 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
530 .boottime_alarm.fd = -1,
531 .boottime_alarm.next = USEC_INFINITY,
532 .perturb = USEC_INFINITY,
533 .original_pid = getpid_cached(),
534 };
535
536 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
537 if (r < 0)
538 goto fail;
539
540 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
541 if (e->epoll_fd < 0) {
542 r = -errno;
543 goto fail;
544 }
545
546 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
547
548 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
549 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
550 e->profile_delays = true;
551 }
552
553 *ret = e;
554 return 0;
555
556 fail:
557 event_free(e);
558 return r;
559 }
560
561 _public_ sd_event* sd_event_ref(sd_event *e) {
562
563 if (!e)
564 return NULL;
565
566 assert(e->n_ref >= 1);
567 e->n_ref++;
568
569 return e;
570 }
571
572 _public_ sd_event* sd_event_unref(sd_event *e) {
573
574 if (!e)
575 return NULL;
576
577 assert(e->n_ref >= 1);
578 e->n_ref--;
579
580 if (e->n_ref <= 0)
581 event_free(e);
582
583 return NULL;
584 }
585
586 static bool event_pid_changed(sd_event *e) {
587 assert(e);
588
589 /* We don't support people creating an event loop and keeping
590 * it around over a fork(). Let's complain. */
591
592 return e->original_pid != getpid_cached();
593 }
594
595 static void source_io_unregister(sd_event_source *s) {
596 int r;
597
598 assert(s);
599 assert(s->type == SOURCE_IO);
600
601 if (event_pid_changed(s->event))
602 return;
603
604 if (!s->io.registered)
605 return;
606
607 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
608 if (r < 0)
609 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
610 strna(s->description), event_source_type_to_string(s->type));
611
612 s->io.registered = false;
613 }
614
615 static int source_io_register(
616 sd_event_source *s,
617 int enabled,
618 uint32_t events) {
619
620 struct epoll_event ev;
621 int r;
622
623 assert(s);
624 assert(s->type == SOURCE_IO);
625 assert(enabled != SD_EVENT_OFF);
626
627 ev = (struct epoll_event) {
628 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
629 .data.ptr = s,
630 };
631
632 if (s->io.registered)
633 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
634 else
635 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
636 if (r < 0)
637 return -errno;
638
639 s->io.registered = true;
640
641 return 0;
642 }
643
644 static clockid_t event_source_type_to_clock(EventSourceType t) {
645
646 switch (t) {
647
648 case SOURCE_TIME_REALTIME:
649 return CLOCK_REALTIME;
650
651 case SOURCE_TIME_BOOTTIME:
652 return CLOCK_BOOTTIME;
653
654 case SOURCE_TIME_MONOTONIC:
655 return CLOCK_MONOTONIC;
656
657 case SOURCE_TIME_REALTIME_ALARM:
658 return CLOCK_REALTIME_ALARM;
659
660 case SOURCE_TIME_BOOTTIME_ALARM:
661 return CLOCK_BOOTTIME_ALARM;
662
663 default:
664 return (clockid_t) -1;
665 }
666 }
667
668 static EventSourceType clock_to_event_source_type(clockid_t clock) {
669
670 switch (clock) {
671
672 case CLOCK_REALTIME:
673 return SOURCE_TIME_REALTIME;
674
675 case CLOCK_BOOTTIME:
676 return SOURCE_TIME_BOOTTIME;
677
678 case CLOCK_MONOTONIC:
679 return SOURCE_TIME_MONOTONIC;
680
681 case CLOCK_REALTIME_ALARM:
682 return SOURCE_TIME_REALTIME_ALARM;
683
684 case CLOCK_BOOTTIME_ALARM:
685 return SOURCE_TIME_BOOTTIME_ALARM;
686
687 default:
688 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
689 }
690 }
691
692 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
693 assert(e);
694
695 switch (t) {
696
697 case SOURCE_TIME_REALTIME:
698 return &e->realtime;
699
700 case SOURCE_TIME_BOOTTIME:
701 return &e->boottime;
702
703 case SOURCE_TIME_MONOTONIC:
704 return &e->monotonic;
705
706 case SOURCE_TIME_REALTIME_ALARM:
707 return &e->realtime_alarm;
708
709 case SOURCE_TIME_BOOTTIME_ALARM:
710 return &e->boottime_alarm;
711
712 default:
713 return NULL;
714 }
715 }
716
717 static int event_make_signal_data(
718 sd_event *e,
719 int sig,
720 struct signal_data **ret) {
721
722 struct epoll_event ev;
723 struct signal_data *d;
724 bool added = false;
725 sigset_t ss_copy;
726 int64_t priority;
727 int r;
728
729 assert(e);
730
731 if (event_pid_changed(e))
732 return -ECHILD;
733
734 if (e->signal_sources && e->signal_sources[sig])
735 priority = e->signal_sources[sig]->priority;
736 else
737 priority = SD_EVENT_PRIORITY_NORMAL;
738
739 d = hashmap_get(e->signal_data, &priority);
740 if (d) {
741 if (sigismember(&d->sigset, sig) > 0) {
742 if (ret)
743 *ret = d;
744 return 0;
745 }
746 } else {
747 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
748 if (r < 0)
749 return r;
750
751 d = new(struct signal_data, 1);
752 if (!d)
753 return -ENOMEM;
754
755 *d = (struct signal_data) {
756 .wakeup = WAKEUP_SIGNAL_DATA,
757 .fd = -1,
758 .priority = priority,
759 };
760
761 r = hashmap_put(e->signal_data, &d->priority, d);
762 if (r < 0) {
763 free(d);
764 return r;
765 }
766
767 added = true;
768 }
769
770 ss_copy = d->sigset;
771 assert_se(sigaddset(&ss_copy, sig) >= 0);
772
773 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
774 if (r < 0) {
775 r = -errno;
776 goto fail;
777 }
778
779 d->sigset = ss_copy;
780
781 if (d->fd >= 0) {
782 if (ret)
783 *ret = d;
784 return 0;
785 }
786
787 d->fd = fd_move_above_stdio(r);
788
789 ev = (struct epoll_event) {
790 .events = EPOLLIN,
791 .data.ptr = d,
792 };
793
794 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
795 if (r < 0) {
796 r = -errno;
797 goto fail;
798 }
799
800 if (ret)
801 *ret = d;
802
803 return 0;
804
805 fail:
806 if (added) {
807 d->fd = safe_close(d->fd);
808 hashmap_remove(e->signal_data, &d->priority);
809 free(d);
810 }
811
812 return r;
813 }
814
815 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
816 assert(e);
817 assert(d);
818
819 /* Turns off the specified signal in the signal data
820 * object. If the signal mask of the object becomes empty that
821 * way removes it. */
822
823 if (sigismember(&d->sigset, sig) == 0)
824 return;
825
826 assert_se(sigdelset(&d->sigset, sig) >= 0);
827
828 if (sigisemptyset(&d->sigset)) {
829
830 /* If all the mask is all-zero we can get rid of the structure */
831 hashmap_remove(e->signal_data, &d->priority);
832 safe_close(d->fd);
833 free(d);
834 return;
835 }
836
837 assert(d->fd >= 0);
838
839 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
840 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
841 }
842
843 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
844 struct signal_data *d;
845 static const int64_t zero_priority = 0;
846
847 assert(e);
848
849 /* Rechecks if the specified signal is still something we are
850 * interested in. If not, we'll unmask it, and possibly drop
851 * the signalfd for it. */
852
853 if (sig == SIGCHLD &&
854 e->n_enabled_child_sources > 0)
855 return;
856
857 if (e->signal_sources &&
858 e->signal_sources[sig] &&
859 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
860 return;
861
862 /*
863 * The specified signal might be enabled in three different queues:
864 *
865 * 1) the one that belongs to the priority passed (if it is non-NULL)
866 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
867 * 3) the 0 priority (to cover the SIGCHLD case)
868 *
869 * Hence, let's remove it from all three here.
870 */
871
872 if (priority) {
873 d = hashmap_get(e->signal_data, priority);
874 if (d)
875 event_unmask_signal_data(e, d, sig);
876 }
877
878 if (e->signal_sources && e->signal_sources[sig]) {
879 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
880 if (d)
881 event_unmask_signal_data(e, d, sig);
882 }
883
884 d = hashmap_get(e->signal_data, &zero_priority);
885 if (d)
886 event_unmask_signal_data(e, d, sig);
887 }
888
889 static void source_disconnect(sd_event_source *s) {
890 sd_event *event;
891
892 assert(s);
893
894 if (!s->event)
895 return;
896
897 assert(s->event->n_sources > 0);
898
899 switch (s->type) {
900
901 case SOURCE_IO:
902 if (s->io.fd >= 0)
903 source_io_unregister(s);
904
905 break;
906
907 case SOURCE_TIME_REALTIME:
908 case SOURCE_TIME_BOOTTIME:
909 case SOURCE_TIME_MONOTONIC:
910 case SOURCE_TIME_REALTIME_ALARM:
911 case SOURCE_TIME_BOOTTIME_ALARM: {
912 struct clock_data *d;
913
914 d = event_get_clock_data(s->event, s->type);
915 assert(d);
916
917 prioq_remove(d->earliest, s, &s->time.earliest_index);
918 prioq_remove(d->latest, s, &s->time.latest_index);
919 d->needs_rearm = true;
920 break;
921 }
922
923 case SOURCE_SIGNAL:
924 if (s->signal.sig > 0) {
925
926 if (s->event->signal_sources)
927 s->event->signal_sources[s->signal.sig] = NULL;
928
929 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
930 }
931
932 break;
933
934 case SOURCE_CHILD:
935 if (s->child.pid > 0) {
936 if (s->enabled != SD_EVENT_OFF) {
937 assert(s->event->n_enabled_child_sources > 0);
938 s->event->n_enabled_child_sources--;
939 }
940
941 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
942 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
943 }
944
945 break;
946
947 case SOURCE_DEFER:
948 /* nothing */
949 break;
950
951 case SOURCE_POST:
952 set_remove(s->event->post_sources, s);
953 break;
954
955 case SOURCE_EXIT:
956 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
957 break;
958
959 case SOURCE_INOTIFY: {
960 struct inode_data *inode_data;
961
962 inode_data = s->inotify.inode_data;
963 if (inode_data) {
964 struct inotify_data *inotify_data;
965 assert_se(inotify_data = inode_data->inotify_data);
966
967 /* Detach this event source from the inode object */
968 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
969 s->inotify.inode_data = NULL;
970
971 if (s->pending) {
972 assert(inotify_data->n_pending > 0);
973 inotify_data->n_pending--;
974 }
975
976 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
977 * continued to being watched. That's because inotify doesn't really have an API for that: we
978 * can only change watch masks with access to the original inode either by fd or by path. But
979 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
980 * continously and keeping the mount busy which we can't really do. We could reconstruct the
981 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
982 * there), but given the need for open_by_handle_at() which is privileged and not universally
983 * available this would be quite an incomplete solution. Hence we go the other way, leave the
984 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
985 * anymore after reception. Yes, this sucks, but … Linux … */
986
987 /* Maybe release the inode data (and its inotify) */
988 event_gc_inode_data(s->event, inode_data);
989 }
990
991 break;
992 }
993
994 default:
995 assert_not_reached("Wut? I shouldn't exist.");
996 }
997
998 if (s->pending)
999 prioq_remove(s->event->pending, s, &s->pending_index);
1000
1001 if (s->prepare)
1002 prioq_remove(s->event->prepare, s, &s->prepare_index);
1003
1004 event = s->event;
1005
1006 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
1007 s->event = NULL;
1008 LIST_REMOVE(sources, event->sources, s);
1009 event->n_sources--;
1010
1011 if (!s->floating)
1012 sd_event_unref(event);
1013 }
1014
1015 static void source_free(sd_event_source *s) {
1016 assert(s);
1017
1018 source_disconnect(s);
1019
1020 if (s->type == SOURCE_IO && s->io.owned)
1021 s->io.fd = safe_close(s->io.fd);
1022
1023 if (s->destroy_callback)
1024 s->destroy_callback(s->userdata);
1025
1026 free(s->description);
1027 free(s);
1028 }
1029
1030 static int source_set_pending(sd_event_source *s, bool b) {
1031 int r;
1032
1033 assert(s);
1034 assert(s->type != SOURCE_EXIT);
1035
1036 if (s->pending == b)
1037 return 0;
1038
1039 s->pending = b;
1040
1041 if (b) {
1042 s->pending_iteration = s->event->iteration;
1043
1044 r = prioq_put(s->event->pending, s, &s->pending_index);
1045 if (r < 0) {
1046 s->pending = false;
1047 return r;
1048 }
1049 } else
1050 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1051
1052 if (EVENT_SOURCE_IS_TIME(s->type)) {
1053 struct clock_data *d;
1054
1055 d = event_get_clock_data(s->event, s->type);
1056 assert(d);
1057
1058 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1059 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1060 d->needs_rearm = true;
1061 }
1062
1063 if (s->type == SOURCE_SIGNAL && !b) {
1064 struct signal_data *d;
1065
1066 d = hashmap_get(s->event->signal_data, &s->priority);
1067 if (d && d->current == s)
1068 d->current = NULL;
1069 }
1070
1071 if (s->type == SOURCE_INOTIFY) {
1072
1073 assert(s->inotify.inode_data);
1074 assert(s->inotify.inode_data->inotify_data);
1075
1076 if (b)
1077 s->inotify.inode_data->inotify_data->n_pending ++;
1078 else {
1079 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1080 s->inotify.inode_data->inotify_data->n_pending --;
1081 }
1082 }
1083
1084 return 0;
1085 }
1086
1087 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1088 sd_event_source *s;
1089
1090 assert(e);
1091
1092 s = new(sd_event_source, 1);
1093 if (!s)
1094 return NULL;
1095
1096 *s = (struct sd_event_source) {
1097 .n_ref = 1,
1098 .event = e,
1099 .floating = floating,
1100 .type = type,
1101 .pending_index = PRIOQ_IDX_NULL,
1102 .prepare_index = PRIOQ_IDX_NULL,
1103 };
1104
1105 if (!floating)
1106 sd_event_ref(e);
1107
1108 LIST_PREPEND(sources, e->sources, s);
1109 e->n_sources++;
1110
1111 return s;
1112 }
1113
1114 _public_ int sd_event_add_io(
1115 sd_event *e,
1116 sd_event_source **ret,
1117 int fd,
1118 uint32_t events,
1119 sd_event_io_handler_t callback,
1120 void *userdata) {
1121
1122 sd_event_source *s;
1123 int r;
1124
1125 assert_return(e, -EINVAL);
1126 assert_return(e = event_resolve(e), -ENOPKG);
1127 assert_return(fd >= 0, -EBADF);
1128 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1129 assert_return(callback, -EINVAL);
1130 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1131 assert_return(!event_pid_changed(e), -ECHILD);
1132
1133 s = source_new(e, !ret, SOURCE_IO);
1134 if (!s)
1135 return -ENOMEM;
1136
1137 s->wakeup = WAKEUP_EVENT_SOURCE;
1138 s->io.fd = fd;
1139 s->io.events = events;
1140 s->io.callback = callback;
1141 s->userdata = userdata;
1142 s->enabled = SD_EVENT_ON;
1143
1144 r = source_io_register(s, s->enabled, events);
1145 if (r < 0) {
1146 source_free(s);
1147 return r;
1148 }
1149
1150 if (ret)
1151 *ret = s;
1152
1153 return 0;
1154 }
1155
1156 static void initialize_perturb(sd_event *e) {
1157 sd_id128_t bootid = {};
1158
1159 /* When we sleep for longer, we try to realign the wakeup to
1160 the same time wihtin each minute/second/250ms, so that
1161 events all across the system can be coalesced into a single
1162 CPU wakeup. However, let's take some system-specific
1163 randomness for this value, so that in a network of systems
1164 with synced clocks timer events are distributed a
1165 bit. Here, we calculate a perturbation usec offset from the
1166 boot ID. */
1167
1168 if (_likely_(e->perturb != USEC_INFINITY))
1169 return;
1170
1171 if (sd_id128_get_boot(&bootid) >= 0)
1172 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1173 }
1174
1175 static int event_setup_timer_fd(
1176 sd_event *e,
1177 struct clock_data *d,
1178 clockid_t clock) {
1179
1180 struct epoll_event ev;
1181 int r, fd;
1182
1183 assert(e);
1184 assert(d);
1185
1186 if (_likely_(d->fd >= 0))
1187 return 0;
1188
1189 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1190 if (fd < 0)
1191 return -errno;
1192
1193 fd = fd_move_above_stdio(fd);
1194
1195 ev = (struct epoll_event) {
1196 .events = EPOLLIN,
1197 .data.ptr = d,
1198 };
1199
1200 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1201 if (r < 0) {
1202 safe_close(fd);
1203 return -errno;
1204 }
1205
1206 d->fd = fd;
1207 return 0;
1208 }
1209
1210 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1211 assert(s);
1212
1213 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1214 }
1215
1216 _public_ int sd_event_add_time(
1217 sd_event *e,
1218 sd_event_source **ret,
1219 clockid_t clock,
1220 uint64_t usec,
1221 uint64_t accuracy,
1222 sd_event_time_handler_t callback,
1223 void *userdata) {
1224
1225 EventSourceType type;
1226 sd_event_source *s;
1227 struct clock_data *d;
1228 int r;
1229
1230 assert_return(e, -EINVAL);
1231 assert_return(e = event_resolve(e), -ENOPKG);
1232 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1233 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1234 assert_return(!event_pid_changed(e), -ECHILD);
1235
1236 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1237 return -EOPNOTSUPP;
1238
1239 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1240 if (type < 0)
1241 return -EOPNOTSUPP;
1242
1243 if (!callback)
1244 callback = time_exit_callback;
1245
1246 d = event_get_clock_data(e, type);
1247 assert(d);
1248
1249 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1250 if (r < 0)
1251 return r;
1252
1253 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1254 if (r < 0)
1255 return r;
1256
1257 if (d->fd < 0) {
1258 r = event_setup_timer_fd(e, d, clock);
1259 if (r < 0)
1260 return r;
1261 }
1262
1263 s = source_new(e, !ret, type);
1264 if (!s)
1265 return -ENOMEM;
1266
1267 s->time.next = usec;
1268 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1269 s->time.callback = callback;
1270 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1271 s->userdata = userdata;
1272 s->enabled = SD_EVENT_ONESHOT;
1273
1274 d->needs_rearm = true;
1275
1276 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1277 if (r < 0)
1278 goto fail;
1279
1280 r = prioq_put(d->latest, s, &s->time.latest_index);
1281 if (r < 0)
1282 goto fail;
1283
1284 if (ret)
1285 *ret = s;
1286
1287 return 0;
1288
1289 fail:
1290 source_free(s);
1291 return r;
1292 }
1293
1294 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1295 assert(s);
1296
1297 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1298 }
1299
1300 _public_ int sd_event_add_signal(
1301 sd_event *e,
1302 sd_event_source **ret,
1303 int sig,
1304 sd_event_signal_handler_t callback,
1305 void *userdata) {
1306
1307 sd_event_source *s;
1308 struct signal_data *d;
1309 sigset_t ss;
1310 int r;
1311
1312 assert_return(e, -EINVAL);
1313 assert_return(e = event_resolve(e), -ENOPKG);
1314 assert_return(SIGNAL_VALID(sig), -EINVAL);
1315 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1316 assert_return(!event_pid_changed(e), -ECHILD);
1317
1318 if (!callback)
1319 callback = signal_exit_callback;
1320
1321 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1322 if (r != 0)
1323 return -r;
1324
1325 if (!sigismember(&ss, sig))
1326 return -EBUSY;
1327
1328 if (!e->signal_sources) {
1329 e->signal_sources = new0(sd_event_source*, _NSIG);
1330 if (!e->signal_sources)
1331 return -ENOMEM;
1332 } else if (e->signal_sources[sig])
1333 return -EBUSY;
1334
1335 s = source_new(e, !ret, SOURCE_SIGNAL);
1336 if (!s)
1337 return -ENOMEM;
1338
1339 s->signal.sig = sig;
1340 s->signal.callback = callback;
1341 s->userdata = userdata;
1342 s->enabled = SD_EVENT_ON;
1343
1344 e->signal_sources[sig] = s;
1345
1346 r = event_make_signal_data(e, sig, &d);
1347 if (r < 0) {
1348 source_free(s);
1349 return r;
1350 }
1351
1352 /* Use the signal name as description for the event source by default */
1353 (void) sd_event_source_set_description(s, signal_to_string(sig));
1354
1355 if (ret)
1356 *ret = s;
1357
1358 return 0;
1359 }
1360
1361 _public_ int sd_event_add_child(
1362 sd_event *e,
1363 sd_event_source **ret,
1364 pid_t pid,
1365 int options,
1366 sd_event_child_handler_t callback,
1367 void *userdata) {
1368
1369 sd_event_source *s;
1370 int r;
1371
1372 assert_return(e, -EINVAL);
1373 assert_return(e = event_resolve(e), -ENOPKG);
1374 assert_return(pid > 1, -EINVAL);
1375 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1376 assert_return(options != 0, -EINVAL);
1377 assert_return(callback, -EINVAL);
1378 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1379 assert_return(!event_pid_changed(e), -ECHILD);
1380
1381 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1382 if (r < 0)
1383 return r;
1384
1385 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1386 return -EBUSY;
1387
1388 s = source_new(e, !ret, SOURCE_CHILD);
1389 if (!s)
1390 return -ENOMEM;
1391
1392 s->child.pid = pid;
1393 s->child.options = options;
1394 s->child.callback = callback;
1395 s->userdata = userdata;
1396 s->enabled = SD_EVENT_ONESHOT;
1397
1398 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1399 if (r < 0) {
1400 source_free(s);
1401 return r;
1402 }
1403
1404 e->n_enabled_child_sources++;
1405
1406 r = event_make_signal_data(e, SIGCHLD, NULL);
1407 if (r < 0) {
1408 e->n_enabled_child_sources--;
1409 source_free(s);
1410 return r;
1411 }
1412
1413 e->need_process_child = true;
1414
1415 if (ret)
1416 *ret = s;
1417
1418 return 0;
1419 }
1420
1421 _public_ int sd_event_add_defer(
1422 sd_event *e,
1423 sd_event_source **ret,
1424 sd_event_handler_t callback,
1425 void *userdata) {
1426
1427 sd_event_source *s;
1428 int r;
1429
1430 assert_return(e, -EINVAL);
1431 assert_return(e = event_resolve(e), -ENOPKG);
1432 assert_return(callback, -EINVAL);
1433 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1434 assert_return(!event_pid_changed(e), -ECHILD);
1435
1436 s = source_new(e, !ret, SOURCE_DEFER);
1437 if (!s)
1438 return -ENOMEM;
1439
1440 s->defer.callback = callback;
1441 s->userdata = userdata;
1442 s->enabled = SD_EVENT_ONESHOT;
1443
1444 r = source_set_pending(s, true);
1445 if (r < 0) {
1446 source_free(s);
1447 return r;
1448 }
1449
1450 if (ret)
1451 *ret = s;
1452
1453 return 0;
1454 }
1455
1456 _public_ int sd_event_add_post(
1457 sd_event *e,
1458 sd_event_source **ret,
1459 sd_event_handler_t callback,
1460 void *userdata) {
1461
1462 sd_event_source *s;
1463 int r;
1464
1465 assert_return(e, -EINVAL);
1466 assert_return(e = event_resolve(e), -ENOPKG);
1467 assert_return(callback, -EINVAL);
1468 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1469 assert_return(!event_pid_changed(e), -ECHILD);
1470
1471 r = set_ensure_allocated(&e->post_sources, NULL);
1472 if (r < 0)
1473 return r;
1474
1475 s = source_new(e, !ret, SOURCE_POST);
1476 if (!s)
1477 return -ENOMEM;
1478
1479 s->post.callback = callback;
1480 s->userdata = userdata;
1481 s->enabled = SD_EVENT_ON;
1482
1483 r = set_put(e->post_sources, s);
1484 if (r < 0) {
1485 source_free(s);
1486 return r;
1487 }
1488
1489 if (ret)
1490 *ret = s;
1491
1492 return 0;
1493 }
1494
1495 _public_ int sd_event_add_exit(
1496 sd_event *e,
1497 sd_event_source **ret,
1498 sd_event_handler_t callback,
1499 void *userdata) {
1500
1501 sd_event_source *s;
1502 int r;
1503
1504 assert_return(e, -EINVAL);
1505 assert_return(e = event_resolve(e), -ENOPKG);
1506 assert_return(callback, -EINVAL);
1507 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1508 assert_return(!event_pid_changed(e), -ECHILD);
1509
1510 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1511 if (r < 0)
1512 return r;
1513
1514 s = source_new(e, !ret, SOURCE_EXIT);
1515 if (!s)
1516 return -ENOMEM;
1517
1518 s->exit.callback = callback;
1519 s->userdata = userdata;
1520 s->exit.prioq_index = PRIOQ_IDX_NULL;
1521 s->enabled = SD_EVENT_ONESHOT;
1522
1523 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1524 if (r < 0) {
1525 source_free(s);
1526 return r;
1527 }
1528
1529 if (ret)
1530 *ret = s;
1531
1532 return 0;
1533 }
1534
1535 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1536 assert(e);
1537
1538 if (!d)
1539 return;
1540
1541 assert(hashmap_isempty(d->inodes));
1542 assert(hashmap_isempty(d->wd));
1543
1544 if (d->buffer_filled > 0)
1545 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1546
1547 hashmap_free(d->inodes);
1548 hashmap_free(d->wd);
1549
1550 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1551
1552 if (d->fd >= 0) {
1553 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1554 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1555
1556 safe_close(d->fd);
1557 }
1558 free(d);
1559 }
1560
1561 static int event_make_inotify_data(
1562 sd_event *e,
1563 int64_t priority,
1564 struct inotify_data **ret) {
1565
1566 _cleanup_close_ int fd = -1;
1567 struct inotify_data *d;
1568 struct epoll_event ev;
1569 int r;
1570
1571 assert(e);
1572
1573 d = hashmap_get(e->inotify_data, &priority);
1574 if (d) {
1575 if (ret)
1576 *ret = d;
1577 return 0;
1578 }
1579
1580 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1581 if (fd < 0)
1582 return -errno;
1583
1584 fd = fd_move_above_stdio(fd);
1585
1586 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1587 if (r < 0)
1588 return r;
1589
1590 d = new(struct inotify_data, 1);
1591 if (!d)
1592 return -ENOMEM;
1593
1594 *d = (struct inotify_data) {
1595 .wakeup = WAKEUP_INOTIFY_DATA,
1596 .fd = TAKE_FD(fd),
1597 .priority = priority,
1598 };
1599
1600 r = hashmap_put(e->inotify_data, &d->priority, d);
1601 if (r < 0) {
1602 d->fd = safe_close(d->fd);
1603 free(d);
1604 return r;
1605 }
1606
1607 ev = (struct epoll_event) {
1608 .events = EPOLLIN,
1609 .data.ptr = d,
1610 };
1611
1612 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1613 r = -errno;
1614 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1615 * remove the fd from the epoll first, which we don't want as we couldn't
1616 * add it in the first place. */
1617 event_free_inotify_data(e, d);
1618 return r;
1619 }
1620
1621 if (ret)
1622 *ret = d;
1623
1624 return 1;
1625 }
1626
1627 static int inode_data_compare(const void *a, const void *b) {
1628 const struct inode_data *x = a, *y = b;
1629
1630 assert(x);
1631 assert(y);
1632
1633 if (x->dev < y->dev)
1634 return -1;
1635 if (x->dev > y->dev)
1636 return 1;
1637
1638 if (x->ino < y->ino)
1639 return -1;
1640 if (x->ino > y->ino)
1641 return 1;
1642
1643 return 0;
1644 }
1645
1646 static void inode_data_hash_func(const void *p, struct siphash *state) {
1647 const struct inode_data *d = p;
1648
1649 assert(p);
1650
1651 siphash24_compress(&d->dev, sizeof(d->dev), state);
1652 siphash24_compress(&d->ino, sizeof(d->ino), state);
1653 }
1654
1655 const struct hash_ops inode_data_hash_ops = {
1656 .hash = inode_data_hash_func,
1657 .compare = inode_data_compare
1658 };
1659
1660 static void event_free_inode_data(
1661 sd_event *e,
1662 struct inode_data *d) {
1663
1664 assert(e);
1665
1666 if (!d)
1667 return;
1668
1669 assert(!d->event_sources);
1670
1671 if (d->fd >= 0) {
1672 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1673 safe_close(d->fd);
1674 }
1675
1676 if (d->inotify_data) {
1677
1678 if (d->wd >= 0) {
1679 if (d->inotify_data->fd >= 0) {
1680 /* So here's a problem. At the time this runs the watch descriptor might already be
1681 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1682 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1683 * likely case to happen. */
1684
1685 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1686 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1687 }
1688
1689 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1690 }
1691
1692 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1693 }
1694
1695 free(d);
1696 }
1697
1698 static void event_gc_inode_data(
1699 sd_event *e,
1700 struct inode_data *d) {
1701
1702 struct inotify_data *inotify_data;
1703
1704 assert(e);
1705
1706 if (!d)
1707 return;
1708
1709 if (d->event_sources)
1710 return;
1711
1712 inotify_data = d->inotify_data;
1713 event_free_inode_data(e, d);
1714
1715 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1716 event_free_inotify_data(e, inotify_data);
1717 }
1718
1719 static int event_make_inode_data(
1720 sd_event *e,
1721 struct inotify_data *inotify_data,
1722 dev_t dev,
1723 ino_t ino,
1724 struct inode_data **ret) {
1725
1726 struct inode_data *d, key;
1727 int r;
1728
1729 assert(e);
1730 assert(inotify_data);
1731
1732 key = (struct inode_data) {
1733 .ino = ino,
1734 .dev = dev,
1735 };
1736
1737 d = hashmap_get(inotify_data->inodes, &key);
1738 if (d) {
1739 if (ret)
1740 *ret = d;
1741
1742 return 0;
1743 }
1744
1745 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1746 if (r < 0)
1747 return r;
1748
1749 d = new(struct inode_data, 1);
1750 if (!d)
1751 return -ENOMEM;
1752
1753 *d = (struct inode_data) {
1754 .dev = dev,
1755 .ino = ino,
1756 .wd = -1,
1757 .fd = -1,
1758 .inotify_data = inotify_data,
1759 };
1760
1761 r = hashmap_put(inotify_data->inodes, d, d);
1762 if (r < 0) {
1763 free(d);
1764 return r;
1765 }
1766
1767 if (ret)
1768 *ret = d;
1769
1770 return 1;
1771 }
1772
1773 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1774 bool excl_unlink = true;
1775 uint32_t combined = 0;
1776 sd_event_source *s;
1777
1778 assert(d);
1779
1780 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1781 * the IN_EXCL_UNLINK flag is ANDed instead.
1782 *
1783 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1784 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1785 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1786 * events we don't care for client-side. */
1787
1788 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1789
1790 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1791 excl_unlink = false;
1792
1793 combined |= s->inotify.mask;
1794 }
1795
1796 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1797 }
1798
1799 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1800 uint32_t combined_mask;
1801 int wd, r;
1802
1803 assert(d);
1804 assert(d->fd >= 0);
1805
1806 combined_mask = inode_data_determine_mask(d);
1807
1808 if (d->wd >= 0 && combined_mask == d->combined_mask)
1809 return 0;
1810
1811 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1812 if (r < 0)
1813 return r;
1814
1815 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1816 if (wd < 0)
1817 return -errno;
1818
1819 if (d->wd < 0) {
1820 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1821 if (r < 0) {
1822 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1823 return r;
1824 }
1825
1826 d->wd = wd;
1827
1828 } else if (d->wd != wd) {
1829
1830 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1831 (void) inotify_rm_watch(d->fd, wd);
1832 return -EINVAL;
1833 }
1834
1835 d->combined_mask = combined_mask;
1836 return 1;
1837 }
1838
1839 _public_ int sd_event_add_inotify(
1840 sd_event *e,
1841 sd_event_source **ret,
1842 const char *path,
1843 uint32_t mask,
1844 sd_event_inotify_handler_t callback,
1845 void *userdata) {
1846
1847 bool rm_inotify = false, rm_inode = false;
1848 struct inotify_data *inotify_data = NULL;
1849 struct inode_data *inode_data = NULL;
1850 _cleanup_close_ int fd = -1;
1851 sd_event_source *s;
1852 struct stat st;
1853 int r;
1854
1855 assert_return(e, -EINVAL);
1856 assert_return(e = event_resolve(e), -ENOPKG);
1857 assert_return(path, -EINVAL);
1858 assert_return(callback, -EINVAL);
1859 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1860 assert_return(!event_pid_changed(e), -ECHILD);
1861
1862 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1863 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1864 * the user can't use them for us. */
1865 if (mask & IN_MASK_ADD)
1866 return -EINVAL;
1867
1868 fd = open(path, O_PATH|O_CLOEXEC|
1869 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1870 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1871 if (fd < 0)
1872 return -errno;
1873
1874 if (fstat(fd, &st) < 0)
1875 return -errno;
1876
1877 s = source_new(e, !ret, SOURCE_INOTIFY);
1878 if (!s)
1879 return -ENOMEM;
1880
1881 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1882 s->inotify.mask = mask;
1883 s->inotify.callback = callback;
1884 s->userdata = userdata;
1885
1886 /* Allocate an inotify object for this priority, and an inode object within it */
1887 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1888 if (r < 0)
1889 goto fail;
1890 rm_inotify = r > 0;
1891
1892 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1893 if (r < 0)
1894 goto fail;
1895 rm_inode = r > 0;
1896
1897 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1898 * the event source, until then, for which we need the original inode. */
1899 if (inode_data->fd < 0) {
1900 inode_data->fd = TAKE_FD(fd);
1901 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1902 }
1903
1904 /* Link our event source to the inode data object */
1905 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1906 s->inotify.inode_data = inode_data;
1907
1908 rm_inode = rm_inotify = false;
1909
1910 /* Actually realize the watch now */
1911 r = inode_data_realize_watch(e, inode_data);
1912 if (r < 0)
1913 goto fail;
1914
1915 (void) sd_event_source_set_description(s, path);
1916
1917 if (ret)
1918 *ret = s;
1919
1920 return 0;
1921
1922 fail:
1923 source_free(s);
1924
1925 if (rm_inode)
1926 event_free_inode_data(e, inode_data);
1927
1928 if (rm_inotify)
1929 event_free_inotify_data(e, inotify_data);
1930
1931 return r;
1932 }
1933
1934 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1935
1936 if (!s)
1937 return NULL;
1938
1939 assert(s->n_ref >= 1);
1940 s->n_ref++;
1941
1942 return s;
1943 }
1944
1945 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1946
1947 if (!s)
1948 return NULL;
1949
1950 assert(s->n_ref >= 1);
1951 s->n_ref--;
1952
1953 if (s->n_ref <= 0) {
1954 /* Here's a special hack: when we are called from a
1955 * dispatch handler we won't free the event source
1956 * immediately, but we will detach the fd from the
1957 * epoll. This way it is safe for the caller to unref
1958 * the event source and immediately close the fd, but
1959 * we still retain a valid event source object after
1960 * the callback. */
1961
1962 if (s->dispatching) {
1963 if (s->type == SOURCE_IO)
1964 source_io_unregister(s);
1965
1966 source_disconnect(s);
1967 } else
1968 source_free(s);
1969 }
1970
1971 return NULL;
1972 }
1973
1974 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1975 assert_return(s, -EINVAL);
1976 assert_return(!event_pid_changed(s->event), -ECHILD);
1977
1978 return free_and_strdup(&s->description, description);
1979 }
1980
1981 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1982 assert_return(s, -EINVAL);
1983 assert_return(description, -EINVAL);
1984 assert_return(s->description, -ENXIO);
1985 assert_return(!event_pid_changed(s->event), -ECHILD);
1986
1987 *description = s->description;
1988 return 0;
1989 }
1990
1991 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1992 assert_return(s, NULL);
1993
1994 return s->event;
1995 }
1996
1997 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1998 assert_return(s, -EINVAL);
1999 assert_return(s->type != SOURCE_EXIT, -EDOM);
2000 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2001 assert_return(!event_pid_changed(s->event), -ECHILD);
2002
2003 return s->pending;
2004 }
2005
2006 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
2007 assert_return(s, -EINVAL);
2008 assert_return(s->type == SOURCE_IO, -EDOM);
2009 assert_return(!event_pid_changed(s->event), -ECHILD);
2010
2011 return s->io.fd;
2012 }
2013
2014 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2015 int r;
2016
2017 assert_return(s, -EINVAL);
2018 assert_return(fd >= 0, -EBADF);
2019 assert_return(s->type == SOURCE_IO, -EDOM);
2020 assert_return(!event_pid_changed(s->event), -ECHILD);
2021
2022 if (s->io.fd == fd)
2023 return 0;
2024
2025 if (s->enabled == SD_EVENT_OFF) {
2026 s->io.fd = fd;
2027 s->io.registered = false;
2028 } else {
2029 int saved_fd;
2030
2031 saved_fd = s->io.fd;
2032 assert(s->io.registered);
2033
2034 s->io.fd = fd;
2035 s->io.registered = false;
2036
2037 r = source_io_register(s, s->enabled, s->io.events);
2038 if (r < 0) {
2039 s->io.fd = saved_fd;
2040 s->io.registered = true;
2041 return r;
2042 }
2043
2044 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2045 }
2046
2047 return 0;
2048 }
2049
2050 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2051 assert_return(s, -EINVAL);
2052 assert_return(s->type == SOURCE_IO, -EDOM);
2053
2054 return s->io.owned;
2055 }
2056
2057 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2058 assert_return(s, -EINVAL);
2059 assert_return(s->type == SOURCE_IO, -EDOM);
2060
2061 s->io.owned = own;
2062 return 0;
2063 }
2064
2065 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2066 assert_return(s, -EINVAL);
2067 assert_return(events, -EINVAL);
2068 assert_return(s->type == SOURCE_IO, -EDOM);
2069 assert_return(!event_pid_changed(s->event), -ECHILD);
2070
2071 *events = s->io.events;
2072 return 0;
2073 }
2074
2075 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2076 int r;
2077
2078 assert_return(s, -EINVAL);
2079 assert_return(s->type == SOURCE_IO, -EDOM);
2080 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2081 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2082 assert_return(!event_pid_changed(s->event), -ECHILD);
2083
2084 /* edge-triggered updates are never skipped, so we can reset edges */
2085 if (s->io.events == events && !(events & EPOLLET))
2086 return 0;
2087
2088 r = source_set_pending(s, false);
2089 if (r < 0)
2090 return r;
2091
2092 if (s->enabled != SD_EVENT_OFF) {
2093 r = source_io_register(s, s->enabled, events);
2094 if (r < 0)
2095 return r;
2096 }
2097
2098 s->io.events = events;
2099
2100 return 0;
2101 }
2102
2103 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2104 assert_return(s, -EINVAL);
2105 assert_return(revents, -EINVAL);
2106 assert_return(s->type == SOURCE_IO, -EDOM);
2107 assert_return(s->pending, -ENODATA);
2108 assert_return(!event_pid_changed(s->event), -ECHILD);
2109
2110 *revents = s->io.revents;
2111 return 0;
2112 }
2113
2114 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2115 assert_return(s, -EINVAL);
2116 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2117 assert_return(!event_pid_changed(s->event), -ECHILD);
2118
2119 return s->signal.sig;
2120 }
2121
2122 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2123 assert_return(s, -EINVAL);
2124 assert_return(!event_pid_changed(s->event), -ECHILD);
2125
2126 *priority = s->priority;
2127 return 0;
2128 }
2129
2130 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2131 bool rm_inotify = false, rm_inode = false;
2132 struct inotify_data *new_inotify_data = NULL;
2133 struct inode_data *new_inode_data = NULL;
2134 int r;
2135
2136 assert_return(s, -EINVAL);
2137 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2138 assert_return(!event_pid_changed(s->event), -ECHILD);
2139
2140 if (s->priority == priority)
2141 return 0;
2142
2143 if (s->type == SOURCE_INOTIFY) {
2144 struct inode_data *old_inode_data;
2145
2146 assert(s->inotify.inode_data);
2147 old_inode_data = s->inotify.inode_data;
2148
2149 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2150 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2151 * events we allow priority changes only until the first following iteration. */
2152 if (old_inode_data->fd < 0)
2153 return -EOPNOTSUPP;
2154
2155 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2156 if (r < 0)
2157 return r;
2158 rm_inotify = r > 0;
2159
2160 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2161 if (r < 0)
2162 goto fail;
2163 rm_inode = r > 0;
2164
2165 if (new_inode_data->fd < 0) {
2166 /* Duplicate the fd for the new inode object if we don't have any yet */
2167 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2168 if (new_inode_data->fd < 0) {
2169 r = -errno;
2170 goto fail;
2171 }
2172
2173 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2174 }
2175
2176 /* Move the event source to the new inode data structure */
2177 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2178 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2179 s->inotify.inode_data = new_inode_data;
2180
2181 /* Now create the new watch */
2182 r = inode_data_realize_watch(s->event, new_inode_data);
2183 if (r < 0) {
2184 /* Move it back */
2185 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2186 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2187 s->inotify.inode_data = old_inode_data;
2188 goto fail;
2189 }
2190
2191 s->priority = priority;
2192
2193 event_gc_inode_data(s->event, old_inode_data);
2194
2195 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2196 struct signal_data *old, *d;
2197
2198 /* Move us from the signalfd belonging to the old
2199 * priority to the signalfd of the new priority */
2200
2201 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2202
2203 s->priority = priority;
2204
2205 r = event_make_signal_data(s->event, s->signal.sig, &d);
2206 if (r < 0) {
2207 s->priority = old->priority;
2208 return r;
2209 }
2210
2211 event_unmask_signal_data(s->event, old, s->signal.sig);
2212 } else
2213 s->priority = priority;
2214
2215 if (s->pending)
2216 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2217
2218 if (s->prepare)
2219 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2220
2221 if (s->type == SOURCE_EXIT)
2222 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2223
2224 return 0;
2225
2226 fail:
2227 if (rm_inode)
2228 event_free_inode_data(s->event, new_inode_data);
2229
2230 if (rm_inotify)
2231 event_free_inotify_data(s->event, new_inotify_data);
2232
2233 return r;
2234 }
2235
2236 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2237 assert_return(s, -EINVAL);
2238 assert_return(m, -EINVAL);
2239 assert_return(!event_pid_changed(s->event), -ECHILD);
2240
2241 *m = s->enabled;
2242 return 0;
2243 }
2244
2245 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2246 int r;
2247
2248 assert_return(s, -EINVAL);
2249 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2250 assert_return(!event_pid_changed(s->event), -ECHILD);
2251
2252 /* If we are dead anyway, we are fine with turning off
2253 * sources, but everything else needs to fail. */
2254 if (s->event->state == SD_EVENT_FINISHED)
2255 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2256
2257 if (s->enabled == m)
2258 return 0;
2259
2260 if (m == SD_EVENT_OFF) {
2261
2262 /* Unset the pending flag when this event source is disabled */
2263 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2264 r = source_set_pending(s, false);
2265 if (r < 0)
2266 return r;
2267 }
2268
2269 switch (s->type) {
2270
2271 case SOURCE_IO:
2272 source_io_unregister(s);
2273 s->enabled = m;
2274 break;
2275
2276 case SOURCE_TIME_REALTIME:
2277 case SOURCE_TIME_BOOTTIME:
2278 case SOURCE_TIME_MONOTONIC:
2279 case SOURCE_TIME_REALTIME_ALARM:
2280 case SOURCE_TIME_BOOTTIME_ALARM: {
2281 struct clock_data *d;
2282
2283 s->enabled = m;
2284 d = event_get_clock_data(s->event, s->type);
2285 assert(d);
2286
2287 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2288 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2289 d->needs_rearm = true;
2290 break;
2291 }
2292
2293 case SOURCE_SIGNAL:
2294 s->enabled = m;
2295
2296 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2297 break;
2298
2299 case SOURCE_CHILD:
2300 s->enabled = m;
2301
2302 assert(s->event->n_enabled_child_sources > 0);
2303 s->event->n_enabled_child_sources--;
2304
2305 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2306 break;
2307
2308 case SOURCE_EXIT:
2309 s->enabled = m;
2310 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2311 break;
2312
2313 case SOURCE_DEFER:
2314 case SOURCE_POST:
2315 case SOURCE_INOTIFY:
2316 s->enabled = m;
2317 break;
2318
2319 default:
2320 assert_not_reached("Wut? I shouldn't exist.");
2321 }
2322
2323 } else {
2324
2325 /* Unset the pending flag when this event source is enabled */
2326 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2327 r = source_set_pending(s, false);
2328 if (r < 0)
2329 return r;
2330 }
2331
2332 switch (s->type) {
2333
2334 case SOURCE_IO:
2335 r = source_io_register(s, m, s->io.events);
2336 if (r < 0)
2337 return r;
2338
2339 s->enabled = m;
2340 break;
2341
2342 case SOURCE_TIME_REALTIME:
2343 case SOURCE_TIME_BOOTTIME:
2344 case SOURCE_TIME_MONOTONIC:
2345 case SOURCE_TIME_REALTIME_ALARM:
2346 case SOURCE_TIME_BOOTTIME_ALARM: {
2347 struct clock_data *d;
2348
2349 s->enabled = m;
2350 d = event_get_clock_data(s->event, s->type);
2351 assert(d);
2352
2353 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2354 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2355 d->needs_rearm = true;
2356 break;
2357 }
2358
2359 case SOURCE_SIGNAL:
2360
2361 s->enabled = m;
2362
2363 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2364 if (r < 0) {
2365 s->enabled = SD_EVENT_OFF;
2366 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2367 return r;
2368 }
2369
2370 break;
2371
2372 case SOURCE_CHILD:
2373
2374 if (s->enabled == SD_EVENT_OFF)
2375 s->event->n_enabled_child_sources++;
2376
2377 s->enabled = m;
2378
2379 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2380 if (r < 0) {
2381 s->enabled = SD_EVENT_OFF;
2382 s->event->n_enabled_child_sources--;
2383 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2384 return r;
2385 }
2386
2387 break;
2388
2389 case SOURCE_EXIT:
2390 s->enabled = m;
2391 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2392 break;
2393
2394 case SOURCE_DEFER:
2395 case SOURCE_POST:
2396 case SOURCE_INOTIFY:
2397 s->enabled = m;
2398 break;
2399
2400 default:
2401 assert_not_reached("Wut? I shouldn't exist.");
2402 }
2403 }
2404
2405 if (s->pending)
2406 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2407
2408 if (s->prepare)
2409 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2410
2411 return 0;
2412 }
2413
2414 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2415 assert_return(s, -EINVAL);
2416 assert_return(usec, -EINVAL);
2417 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2418 assert_return(!event_pid_changed(s->event), -ECHILD);
2419
2420 *usec = s->time.next;
2421 return 0;
2422 }
2423
2424 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2425 struct clock_data *d;
2426 int r;
2427
2428 assert_return(s, -EINVAL);
2429 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2430 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2431 assert_return(!event_pid_changed(s->event), -ECHILD);
2432
2433 r = source_set_pending(s, false);
2434 if (r < 0)
2435 return r;
2436
2437 s->time.next = usec;
2438
2439 d = event_get_clock_data(s->event, s->type);
2440 assert(d);
2441
2442 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2443 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2444 d->needs_rearm = true;
2445
2446 return 0;
2447 }
2448
2449 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2450 assert_return(s, -EINVAL);
2451 assert_return(usec, -EINVAL);
2452 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2453 assert_return(!event_pid_changed(s->event), -ECHILD);
2454
2455 *usec = s->time.accuracy;
2456 return 0;
2457 }
2458
2459 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2460 struct clock_data *d;
2461 int r;
2462
2463 assert_return(s, -EINVAL);
2464 assert_return(usec != (uint64_t) -1, -EINVAL);
2465 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2466 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2467 assert_return(!event_pid_changed(s->event), -ECHILD);
2468
2469 r = source_set_pending(s, false);
2470 if (r < 0)
2471 return r;
2472
2473 if (usec == 0)
2474 usec = DEFAULT_ACCURACY_USEC;
2475
2476 s->time.accuracy = usec;
2477
2478 d = event_get_clock_data(s->event, s->type);
2479 assert(d);
2480
2481 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2482 d->needs_rearm = true;
2483
2484 return 0;
2485 }
2486
2487 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2488 assert_return(s, -EINVAL);
2489 assert_return(clock, -EINVAL);
2490 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2491 assert_return(!event_pid_changed(s->event), -ECHILD);
2492
2493 *clock = event_source_type_to_clock(s->type);
2494 return 0;
2495 }
2496
2497 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2498 assert_return(s, -EINVAL);
2499 assert_return(pid, -EINVAL);
2500 assert_return(s->type == SOURCE_CHILD, -EDOM);
2501 assert_return(!event_pid_changed(s->event), -ECHILD);
2502
2503 *pid = s->child.pid;
2504 return 0;
2505 }
2506
2507 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2508 assert_return(s, -EINVAL);
2509 assert_return(mask, -EINVAL);
2510 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2511 assert_return(!event_pid_changed(s->event), -ECHILD);
2512
2513 *mask = s->inotify.mask;
2514 return 0;
2515 }
2516
2517 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2518 int r;
2519
2520 assert_return(s, -EINVAL);
2521 assert_return(s->type != SOURCE_EXIT, -EDOM);
2522 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2523 assert_return(!event_pid_changed(s->event), -ECHILD);
2524
2525 if (s->prepare == callback)
2526 return 0;
2527
2528 if (callback && s->prepare) {
2529 s->prepare = callback;
2530 return 0;
2531 }
2532
2533 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2534 if (r < 0)
2535 return r;
2536
2537 s->prepare = callback;
2538
2539 if (callback) {
2540 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2541 if (r < 0)
2542 return r;
2543 } else
2544 prioq_remove(s->event->prepare, s, &s->prepare_index);
2545
2546 return 0;
2547 }
2548
2549 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2550 assert_return(s, NULL);
2551
2552 return s->userdata;
2553 }
2554
2555 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2556 void *ret;
2557
2558 assert_return(s, NULL);
2559
2560 ret = s->userdata;
2561 s->userdata = userdata;
2562
2563 return ret;
2564 }
2565
2566 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2567 usec_t c;
2568 assert(e);
2569 assert(a <= b);
2570
2571 if (a <= 0)
2572 return 0;
2573 if (a >= USEC_INFINITY)
2574 return USEC_INFINITY;
2575
2576 if (b <= a + 1)
2577 return a;
2578
2579 initialize_perturb(e);
2580
2581 /*
2582 Find a good time to wake up again between times a and b. We
2583 have two goals here:
2584
2585 a) We want to wake up as seldom as possible, hence prefer
2586 later times over earlier times.
2587
2588 b) But if we have to wake up, then let's make sure to
2589 dispatch as much as possible on the entire system.
2590
2591 We implement this by waking up everywhere at the same time
2592 within any given minute if we can, synchronised via the
2593 perturbation value determined from the boot ID. If we can't,
2594 then we try to find the same spot in every 10s, then 1s and
2595 then 250ms step. Otherwise, we pick the last possible time
2596 to wake up.
2597 */
2598
2599 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2600 if (c >= b) {
2601 if (_unlikely_(c < USEC_PER_MINUTE))
2602 return b;
2603
2604 c -= USEC_PER_MINUTE;
2605 }
2606
2607 if (c >= a)
2608 return c;
2609
2610 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2611 if (c >= b) {
2612 if (_unlikely_(c < USEC_PER_SEC*10))
2613 return b;
2614
2615 c -= USEC_PER_SEC*10;
2616 }
2617
2618 if (c >= a)
2619 return c;
2620
2621 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2622 if (c >= b) {
2623 if (_unlikely_(c < USEC_PER_SEC))
2624 return b;
2625
2626 c -= USEC_PER_SEC;
2627 }
2628
2629 if (c >= a)
2630 return c;
2631
2632 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2633 if (c >= b) {
2634 if (_unlikely_(c < USEC_PER_MSEC*250))
2635 return b;
2636
2637 c -= USEC_PER_MSEC*250;
2638 }
2639
2640 if (c >= a)
2641 return c;
2642
2643 return b;
2644 }
2645
2646 static int event_arm_timer(
2647 sd_event *e,
2648 struct clock_data *d) {
2649
2650 struct itimerspec its = {};
2651 sd_event_source *a, *b;
2652 usec_t t;
2653 int r;
2654
2655 assert(e);
2656 assert(d);
2657
2658 if (!d->needs_rearm)
2659 return 0;
2660 else
2661 d->needs_rearm = false;
2662
2663 a = prioq_peek(d->earliest);
2664 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2665
2666 if (d->fd < 0)
2667 return 0;
2668
2669 if (d->next == USEC_INFINITY)
2670 return 0;
2671
2672 /* disarm */
2673 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2674 if (r < 0)
2675 return r;
2676
2677 d->next = USEC_INFINITY;
2678 return 0;
2679 }
2680
2681 b = prioq_peek(d->latest);
2682 assert_se(b && b->enabled != SD_EVENT_OFF);
2683
2684 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2685 if (d->next == t)
2686 return 0;
2687
2688 assert_se(d->fd >= 0);
2689
2690 if (t == 0) {
2691 /* We don' want to disarm here, just mean some time looooong ago. */
2692 its.it_value.tv_sec = 0;
2693 its.it_value.tv_nsec = 1;
2694 } else
2695 timespec_store(&its.it_value, t);
2696
2697 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2698 if (r < 0)
2699 return -errno;
2700
2701 d->next = t;
2702 return 0;
2703 }
2704
2705 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2706 assert(e);
2707 assert(s);
2708 assert(s->type == SOURCE_IO);
2709
2710 /* If the event source was already pending, we just OR in the
2711 * new revents, otherwise we reset the value. The ORing is
2712 * necessary to handle EPOLLONESHOT events properly where
2713 * readability might happen independently of writability, and
2714 * we need to keep track of both */
2715
2716 if (s->pending)
2717 s->io.revents |= revents;
2718 else
2719 s->io.revents = revents;
2720
2721 return source_set_pending(s, true);
2722 }
2723
2724 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2725 uint64_t x;
2726 ssize_t ss;
2727
2728 assert(e);
2729 assert(fd >= 0);
2730
2731 assert_return(events == EPOLLIN, -EIO);
2732
2733 ss = read(fd, &x, sizeof(x));
2734 if (ss < 0) {
2735 if (IN_SET(errno, EAGAIN, EINTR))
2736 return 0;
2737
2738 return -errno;
2739 }
2740
2741 if (_unlikely_(ss != sizeof(x)))
2742 return -EIO;
2743
2744 if (next)
2745 *next = USEC_INFINITY;
2746
2747 return 0;
2748 }
2749
2750 static int process_timer(
2751 sd_event *e,
2752 usec_t n,
2753 struct clock_data *d) {
2754
2755 sd_event_source *s;
2756 int r;
2757
2758 assert(e);
2759 assert(d);
2760
2761 for (;;) {
2762 s = prioq_peek(d->earliest);
2763 if (!s ||
2764 s->time.next > n ||
2765 s->enabled == SD_EVENT_OFF ||
2766 s->pending)
2767 break;
2768
2769 r = source_set_pending(s, true);
2770 if (r < 0)
2771 return r;
2772
2773 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2774 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2775 d->needs_rearm = true;
2776 }
2777
2778 return 0;
2779 }
2780
2781 static int process_child(sd_event *e) {
2782 sd_event_source *s;
2783 Iterator i;
2784 int r;
2785
2786 assert(e);
2787
2788 e->need_process_child = false;
2789
2790 /*
2791 So, this is ugly. We iteratively invoke waitid() with P_PID
2792 + WNOHANG for each PID we wait for, instead of using
2793 P_ALL. This is because we only want to get child
2794 information of very specific child processes, and not all
2795 of them. We might not have processed the SIGCHLD even of a
2796 previous invocation and we don't want to maintain a
2797 unbounded *per-child* event queue, hence we really don't
2798 want anything flushed out of the kernel's queue that we
2799 don't care about. Since this is O(n) this means that if you
2800 have a lot of processes you probably want to handle SIGCHLD
2801 yourself.
2802
2803 We do not reap the children here (by using WNOWAIT), this
2804 is only done after the event source is dispatched so that
2805 the callback still sees the process as a zombie.
2806 */
2807
2808 HASHMAP_FOREACH(s, e->child_sources, i) {
2809 assert(s->type == SOURCE_CHILD);
2810
2811 if (s->pending)
2812 continue;
2813
2814 if (s->enabled == SD_EVENT_OFF)
2815 continue;
2816
2817 zero(s->child.siginfo);
2818 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2819 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2820 if (r < 0)
2821 return -errno;
2822
2823 if (s->child.siginfo.si_pid != 0) {
2824 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2825
2826 if (!zombie && (s->child.options & WEXITED)) {
2827 /* If the child isn't dead then let's
2828 * immediately remove the state change
2829 * from the queue, since there's no
2830 * benefit in leaving it queued */
2831
2832 assert(s->child.options & (WSTOPPED|WCONTINUED));
2833 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2834 }
2835
2836 r = source_set_pending(s, true);
2837 if (r < 0)
2838 return r;
2839 }
2840 }
2841
2842 return 0;
2843 }
2844
2845 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2846 bool read_one = false;
2847 int r;
2848
2849 assert(e);
2850 assert(d);
2851 assert_return(events == EPOLLIN, -EIO);
2852
2853 /* If there's a signal queued on this priority and SIGCHLD is
2854 on this priority too, then make sure to recheck the
2855 children we watch. This is because we only ever dequeue
2856 the first signal per priority, and if we dequeue one, and
2857 SIGCHLD might be enqueued later we wouldn't know, but we
2858 might have higher priority children we care about hence we
2859 need to check that explicitly. */
2860
2861 if (sigismember(&d->sigset, SIGCHLD))
2862 e->need_process_child = true;
2863
2864 /* If there's already an event source pending for this
2865 * priority we don't read another */
2866 if (d->current)
2867 return 0;
2868
2869 for (;;) {
2870 struct signalfd_siginfo si;
2871 ssize_t n;
2872 sd_event_source *s = NULL;
2873
2874 n = read(d->fd, &si, sizeof(si));
2875 if (n < 0) {
2876 if (IN_SET(errno, EAGAIN, EINTR))
2877 return read_one;
2878
2879 return -errno;
2880 }
2881
2882 if (_unlikely_(n != sizeof(si)))
2883 return -EIO;
2884
2885 assert(SIGNAL_VALID(si.ssi_signo));
2886
2887 read_one = true;
2888
2889 if (e->signal_sources)
2890 s = e->signal_sources[si.ssi_signo];
2891 if (!s)
2892 continue;
2893 if (s->pending)
2894 continue;
2895
2896 s->signal.siginfo = si;
2897 d->current = s;
2898
2899 r = source_set_pending(s, true);
2900 if (r < 0)
2901 return r;
2902
2903 return 1;
2904 }
2905 }
2906
2907 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2908 ssize_t n;
2909
2910 assert(e);
2911 assert(d);
2912
2913 assert_return(revents == EPOLLIN, -EIO);
2914
2915 /* If there's already an event source pending for this priority, don't read another */
2916 if (d->n_pending > 0)
2917 return 0;
2918
2919 /* Is the read buffer non-empty? If so, let's not read more */
2920 if (d->buffer_filled > 0)
2921 return 0;
2922
2923 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2924 if (n < 0) {
2925 if (IN_SET(errno, EAGAIN, EINTR))
2926 return 0;
2927
2928 return -errno;
2929 }
2930
2931 assert(n > 0);
2932 d->buffer_filled = (size_t) n;
2933 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2934
2935 return 1;
2936 }
2937
2938 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2939 assert(e);
2940 assert(d);
2941 assert(sz <= d->buffer_filled);
2942
2943 if (sz == 0)
2944 return;
2945
2946 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2947 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2948 d->buffer_filled -= sz;
2949
2950 if (d->buffer_filled == 0)
2951 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2952 }
2953
2954 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2955 int r;
2956
2957 assert(e);
2958 assert(d);
2959
2960 /* If there's already an event source pending for this priority, don't read another */
2961 if (d->n_pending > 0)
2962 return 0;
2963
2964 while (d->buffer_filled > 0) {
2965 size_t sz;
2966
2967 /* Let's validate that the event structures are complete */
2968 if (d->buffer_filled < offsetof(struct inotify_event, name))
2969 return -EIO;
2970
2971 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2972 if (d->buffer_filled < sz)
2973 return -EIO;
2974
2975 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2976 struct inode_data *inode_data;
2977 Iterator i;
2978
2979 /* The queue overran, let's pass this event to all event sources connected to this inotify
2980 * object */
2981
2982 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2983 sd_event_source *s;
2984
2985 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2986
2987 if (s->enabled == SD_EVENT_OFF)
2988 continue;
2989
2990 r = source_set_pending(s, true);
2991 if (r < 0)
2992 return r;
2993 }
2994 }
2995 } else {
2996 struct inode_data *inode_data;
2997 sd_event_source *s;
2998
2999 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
3000 * our watch descriptor table. */
3001 if (d->buffer.ev.mask & IN_IGNORED) {
3002
3003 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3004 if (!inode_data) {
3005 event_inotify_data_drop(e, d, sz);
3006 continue;
3007 }
3008
3009 /* The watch descriptor was removed by the kernel, let's drop it here too */
3010 inode_data->wd = -1;
3011 } else {
3012 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3013 if (!inode_data) {
3014 event_inotify_data_drop(e, d, sz);
3015 continue;
3016 }
3017 }
3018
3019 /* Trigger all event sources that are interested in these events. Also trigger all event
3020 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3021 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3022
3023 if (s->enabled == SD_EVENT_OFF)
3024 continue;
3025
3026 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3027 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3028 continue;
3029
3030 r = source_set_pending(s, true);
3031 if (r < 0)
3032 return r;
3033 }
3034 }
3035
3036 /* Something pending now? If so, let's finish, otherwise let's read more. */
3037 if (d->n_pending > 0)
3038 return 1;
3039 }
3040
3041 return 0;
3042 }
3043
3044 static int process_inotify(sd_event *e) {
3045 struct inotify_data *d;
3046 int r, done = 0;
3047
3048 assert(e);
3049
3050 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3051 r = event_inotify_data_process(e, d);
3052 if (r < 0)
3053 return r;
3054 if (r > 0)
3055 done ++;
3056 }
3057
3058 return done;
3059 }
3060
3061 static int source_dispatch(sd_event_source *s) {
3062 EventSourceType saved_type;
3063 int r = 0;
3064
3065 assert(s);
3066 assert(s->pending || s->type == SOURCE_EXIT);
3067
3068 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3069 * the event. */
3070 saved_type = s->type;
3071
3072 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3073 r = source_set_pending(s, false);
3074 if (r < 0)
3075 return r;
3076 }
3077
3078 if (s->type != SOURCE_POST) {
3079 sd_event_source *z;
3080 Iterator i;
3081
3082 /* If we execute a non-post source, let's mark all
3083 * post sources as pending */
3084
3085 SET_FOREACH(z, s->event->post_sources, i) {
3086 if (z->enabled == SD_EVENT_OFF)
3087 continue;
3088
3089 r = source_set_pending(z, true);
3090 if (r < 0)
3091 return r;
3092 }
3093 }
3094
3095 if (s->enabled == SD_EVENT_ONESHOT) {
3096 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3097 if (r < 0)
3098 return r;
3099 }
3100
3101 s->dispatching = true;
3102
3103 switch (s->type) {
3104
3105 case SOURCE_IO:
3106 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3107 break;
3108
3109 case SOURCE_TIME_REALTIME:
3110 case SOURCE_TIME_BOOTTIME:
3111 case SOURCE_TIME_MONOTONIC:
3112 case SOURCE_TIME_REALTIME_ALARM:
3113 case SOURCE_TIME_BOOTTIME_ALARM:
3114 r = s->time.callback(s, s->time.next, s->userdata);
3115 break;
3116
3117 case SOURCE_SIGNAL:
3118 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3119 break;
3120
3121 case SOURCE_CHILD: {
3122 bool zombie;
3123
3124 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3125
3126 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3127
3128 /* Now, reap the PID for good. */
3129 if (zombie)
3130 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3131
3132 break;
3133 }
3134
3135 case SOURCE_DEFER:
3136 r = s->defer.callback(s, s->userdata);
3137 break;
3138
3139 case SOURCE_POST:
3140 r = s->post.callback(s, s->userdata);
3141 break;
3142
3143 case SOURCE_EXIT:
3144 r = s->exit.callback(s, s->userdata);
3145 break;
3146
3147 case SOURCE_INOTIFY: {
3148 struct sd_event *e = s->event;
3149 struct inotify_data *d;
3150 size_t sz;
3151
3152 assert(s->inotify.inode_data);
3153 assert_se(d = s->inotify.inode_data->inotify_data);
3154
3155 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3156 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3157 assert(d->buffer_filled >= sz);
3158
3159 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3160
3161 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3162 * buffer. */
3163 if (d->n_pending == 0)
3164 event_inotify_data_drop(e, d, sz);
3165
3166 break;
3167 }
3168
3169 case SOURCE_WATCHDOG:
3170 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3171 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3172 assert_not_reached("Wut? I shouldn't exist.");
3173 }
3174
3175 s->dispatching = false;
3176
3177 if (r < 0)
3178 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3179 strna(s->description), event_source_type_to_string(saved_type));
3180
3181 if (s->n_ref == 0)
3182 source_free(s);
3183 else if (r < 0)
3184 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3185
3186 return 1;
3187 }
3188
3189 static int event_prepare(sd_event *e) {
3190 int r;
3191
3192 assert(e);
3193
3194 for (;;) {
3195 sd_event_source *s;
3196
3197 s = prioq_peek(e->prepare);
3198 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3199 break;
3200
3201 s->prepare_iteration = e->iteration;
3202 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3203 if (r < 0)
3204 return r;
3205
3206 assert(s->prepare);
3207
3208 s->dispatching = true;
3209 r = s->prepare(s, s->userdata);
3210 s->dispatching = false;
3211
3212 if (r < 0)
3213 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3214 strna(s->description), event_source_type_to_string(s->type));
3215
3216 if (s->n_ref == 0)
3217 source_free(s);
3218 else if (r < 0)
3219 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3220 }
3221
3222 return 0;
3223 }
3224
3225 static int dispatch_exit(sd_event *e) {
3226 sd_event_source *p;
3227 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3228 int r;
3229
3230 assert(e);
3231
3232 p = prioq_peek(e->exit);
3233 if (!p || p->enabled == SD_EVENT_OFF) {
3234 e->state = SD_EVENT_FINISHED;
3235 return 0;
3236 }
3237
3238 ref = sd_event_ref(e);
3239 e->iteration++;
3240 e->state = SD_EVENT_EXITING;
3241 r = source_dispatch(p);
3242 e->state = SD_EVENT_INITIAL;
3243 return r;
3244 }
3245
3246 static sd_event_source* event_next_pending(sd_event *e) {
3247 sd_event_source *p;
3248
3249 assert(e);
3250
3251 p = prioq_peek(e->pending);
3252 if (!p)
3253 return NULL;
3254
3255 if (p->enabled == SD_EVENT_OFF)
3256 return NULL;
3257
3258 return p;
3259 }
3260
3261 static int arm_watchdog(sd_event *e) {
3262 struct itimerspec its = {};
3263 usec_t t;
3264 int r;
3265
3266 assert(e);
3267 assert(e->watchdog_fd >= 0);
3268
3269 t = sleep_between(e,
3270 e->watchdog_last + (e->watchdog_period / 2),
3271 e->watchdog_last + (e->watchdog_period * 3 / 4));
3272
3273 timespec_store(&its.it_value, t);
3274
3275 /* Make sure we never set the watchdog to 0, which tells the
3276 * kernel to disable it. */
3277 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3278 its.it_value.tv_nsec = 1;
3279
3280 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3281 if (r < 0)
3282 return -errno;
3283
3284 return 0;
3285 }
3286
3287 static int process_watchdog(sd_event *e) {
3288 assert(e);
3289
3290 if (!e->watchdog)
3291 return 0;
3292
3293 /* Don't notify watchdog too often */
3294 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3295 return 0;
3296
3297 sd_notify(false, "WATCHDOG=1");
3298 e->watchdog_last = e->timestamp.monotonic;
3299
3300 return arm_watchdog(e);
3301 }
3302
3303 static void event_close_inode_data_fds(sd_event *e) {
3304 struct inode_data *d;
3305
3306 assert(e);
3307
3308 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3309 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3310 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3311 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3312 * compromise. */
3313
3314 while ((d = e->inode_data_to_close)) {
3315 assert(d->fd >= 0);
3316 d->fd = safe_close(d->fd);
3317
3318 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3319 }
3320 }
3321
3322 _public_ int sd_event_prepare(sd_event *e) {
3323 int r;
3324
3325 assert_return(e, -EINVAL);
3326 assert_return(e = event_resolve(e), -ENOPKG);
3327 assert_return(!event_pid_changed(e), -ECHILD);
3328 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3329 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3330
3331 if (e->exit_requested)
3332 goto pending;
3333
3334 e->iteration++;
3335
3336 e->state = SD_EVENT_PREPARING;
3337 r = event_prepare(e);
3338 e->state = SD_EVENT_INITIAL;
3339 if (r < 0)
3340 return r;
3341
3342 r = event_arm_timer(e, &e->realtime);
3343 if (r < 0)
3344 return r;
3345
3346 r = event_arm_timer(e, &e->boottime);
3347 if (r < 0)
3348 return r;
3349
3350 r = event_arm_timer(e, &e->monotonic);
3351 if (r < 0)
3352 return r;
3353
3354 r = event_arm_timer(e, &e->realtime_alarm);
3355 if (r < 0)
3356 return r;
3357
3358 r = event_arm_timer(e, &e->boottime_alarm);
3359 if (r < 0)
3360 return r;
3361
3362 event_close_inode_data_fds(e);
3363
3364 if (event_next_pending(e) || e->need_process_child)
3365 goto pending;
3366
3367 e->state = SD_EVENT_ARMED;
3368
3369 return 0;
3370
3371 pending:
3372 e->state = SD_EVENT_ARMED;
3373 r = sd_event_wait(e, 0);
3374 if (r == 0)
3375 e->state = SD_EVENT_ARMED;
3376
3377 return r;
3378 }
3379
3380 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3381 struct epoll_event *ev_queue;
3382 unsigned ev_queue_max;
3383 int r, m, i;
3384
3385 assert_return(e, -EINVAL);
3386 assert_return(e = event_resolve(e), -ENOPKG);
3387 assert_return(!event_pid_changed(e), -ECHILD);
3388 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3389 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3390
3391 if (e->exit_requested) {
3392 e->state = SD_EVENT_PENDING;
3393 return 1;
3394 }
3395
3396 ev_queue_max = MAX(e->n_sources, 1u);
3397 ev_queue = newa(struct epoll_event, ev_queue_max);
3398
3399 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3400 if (e->inotify_data_buffered)
3401 timeout = 0;
3402
3403 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3404 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3405 if (m < 0) {
3406 if (errno == EINTR) {
3407 e->state = SD_EVENT_PENDING;
3408 return 1;
3409 }
3410
3411 r = -errno;
3412 goto finish;
3413 }
3414
3415 triple_timestamp_get(&e->timestamp);
3416
3417 for (i = 0; i < m; i++) {
3418
3419 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3420 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3421 else {
3422 WakeupType *t = ev_queue[i].data.ptr;
3423
3424 switch (*t) {
3425
3426 case WAKEUP_EVENT_SOURCE:
3427 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3428 break;
3429
3430 case WAKEUP_CLOCK_DATA: {
3431 struct clock_data *d = ev_queue[i].data.ptr;
3432 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3433 break;
3434 }
3435
3436 case WAKEUP_SIGNAL_DATA:
3437 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3438 break;
3439
3440 case WAKEUP_INOTIFY_DATA:
3441 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3442 break;
3443
3444 default:
3445 assert_not_reached("Invalid wake-up pointer");
3446 }
3447 }
3448 if (r < 0)
3449 goto finish;
3450 }
3451
3452 r = process_watchdog(e);
3453 if (r < 0)
3454 goto finish;
3455
3456 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3457 if (r < 0)
3458 goto finish;
3459
3460 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3461 if (r < 0)
3462 goto finish;
3463
3464 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3465 if (r < 0)
3466 goto finish;
3467
3468 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3469 if (r < 0)
3470 goto finish;
3471
3472 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3473 if (r < 0)
3474 goto finish;
3475
3476 if (e->need_process_child) {
3477 r = process_child(e);
3478 if (r < 0)
3479 goto finish;
3480 }
3481
3482 r = process_inotify(e);
3483 if (r < 0)
3484 goto finish;
3485
3486 if (event_next_pending(e)) {
3487 e->state = SD_EVENT_PENDING;
3488
3489 return 1;
3490 }
3491
3492 r = 0;
3493
3494 finish:
3495 e->state = SD_EVENT_INITIAL;
3496
3497 return r;
3498 }
3499
3500 _public_ int sd_event_dispatch(sd_event *e) {
3501 sd_event_source *p;
3502 int r;
3503
3504 assert_return(e, -EINVAL);
3505 assert_return(e = event_resolve(e), -ENOPKG);
3506 assert_return(!event_pid_changed(e), -ECHILD);
3507 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3508 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3509
3510 if (e->exit_requested)
3511 return dispatch_exit(e);
3512
3513 p = event_next_pending(e);
3514 if (p) {
3515 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3516
3517 ref = sd_event_ref(e);
3518 e->state = SD_EVENT_RUNNING;
3519 r = source_dispatch(p);
3520 e->state = SD_EVENT_INITIAL;
3521 return r;
3522 }
3523
3524 e->state = SD_EVENT_INITIAL;
3525
3526 return 1;
3527 }
3528
3529 static void event_log_delays(sd_event *e) {
3530 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3531 unsigned i;
3532 int o;
3533
3534 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3535 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3536 e->delays[i] = 0;
3537 }
3538 log_debug("Event loop iterations: %.*s", o, b);
3539 }
3540
3541 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3542 int r;
3543
3544 assert_return(e, -EINVAL);
3545 assert_return(e = event_resolve(e), -ENOPKG);
3546 assert_return(!event_pid_changed(e), -ECHILD);
3547 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3548 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3549
3550 if (e->profile_delays && e->last_run) {
3551 usec_t this_run;
3552 unsigned l;
3553
3554 this_run = now(CLOCK_MONOTONIC);
3555
3556 l = u64log2(this_run - e->last_run);
3557 assert(l < sizeof(e->delays));
3558 e->delays[l]++;
3559
3560 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3561 event_log_delays(e);
3562 e->last_log = this_run;
3563 }
3564 }
3565
3566 r = sd_event_prepare(e);
3567 if (r == 0)
3568 /* There was nothing? Then wait... */
3569 r = sd_event_wait(e, timeout);
3570
3571 if (e->profile_delays)
3572 e->last_run = now(CLOCK_MONOTONIC);
3573
3574 if (r > 0) {
3575 /* There's something now, then let's dispatch it */
3576 r = sd_event_dispatch(e);
3577 if (r < 0)
3578 return r;
3579
3580 return 1;
3581 }
3582
3583 return r;
3584 }
3585
3586 _public_ int sd_event_loop(sd_event *e) {
3587 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3588 int r;
3589
3590 assert_return(e, -EINVAL);
3591 assert_return(e = event_resolve(e), -ENOPKG);
3592 assert_return(!event_pid_changed(e), -ECHILD);
3593 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3594
3595 ref = sd_event_ref(e);
3596
3597 while (e->state != SD_EVENT_FINISHED) {
3598 r = sd_event_run(e, (uint64_t) -1);
3599 if (r < 0)
3600 return r;
3601 }
3602
3603 return e->exit_code;
3604 }
3605
3606 _public_ int sd_event_get_fd(sd_event *e) {
3607
3608 assert_return(e, -EINVAL);
3609 assert_return(e = event_resolve(e), -ENOPKG);
3610 assert_return(!event_pid_changed(e), -ECHILD);
3611
3612 return e->epoll_fd;
3613 }
3614
3615 _public_ int sd_event_get_state(sd_event *e) {
3616 assert_return(e, -EINVAL);
3617 assert_return(e = event_resolve(e), -ENOPKG);
3618 assert_return(!event_pid_changed(e), -ECHILD);
3619
3620 return e->state;
3621 }
3622
3623 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3624 assert_return(e, -EINVAL);
3625 assert_return(e = event_resolve(e), -ENOPKG);
3626 assert_return(code, -EINVAL);
3627 assert_return(!event_pid_changed(e), -ECHILD);
3628
3629 if (!e->exit_requested)
3630 return -ENODATA;
3631
3632 *code = e->exit_code;
3633 return 0;
3634 }
3635
3636 _public_ int sd_event_exit(sd_event *e, int code) {
3637 assert_return(e, -EINVAL);
3638 assert_return(e = event_resolve(e), -ENOPKG);
3639 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3640 assert_return(!event_pid_changed(e), -ECHILD);
3641
3642 e->exit_requested = true;
3643 e->exit_code = code;
3644
3645 return 0;
3646 }
3647
3648 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3649 assert_return(e, -EINVAL);
3650 assert_return(e = event_resolve(e), -ENOPKG);
3651 assert_return(usec, -EINVAL);
3652 assert_return(!event_pid_changed(e), -ECHILD);
3653
3654 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3655 return -EOPNOTSUPP;
3656
3657 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3658 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3659 * the purpose of getting the time this doesn't matter. */
3660 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3661 return -EOPNOTSUPP;
3662
3663 if (!triple_timestamp_is_set(&e->timestamp)) {
3664 /* Implicitly fall back to now() if we never ran
3665 * before and thus have no cached time. */
3666 *usec = now(clock);
3667 return 1;
3668 }
3669
3670 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3671 return 0;
3672 }
3673
3674 _public_ int sd_event_default(sd_event **ret) {
3675 sd_event *e = NULL;
3676 int r;
3677
3678 if (!ret)
3679 return !!default_event;
3680
3681 if (default_event) {
3682 *ret = sd_event_ref(default_event);
3683 return 0;
3684 }
3685
3686 r = sd_event_new(&e);
3687 if (r < 0)
3688 return r;
3689
3690 e->default_event_ptr = &default_event;
3691 e->tid = gettid();
3692 default_event = e;
3693
3694 *ret = e;
3695 return 1;
3696 }
3697
3698 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3699 assert_return(e, -EINVAL);
3700 assert_return(e = event_resolve(e), -ENOPKG);
3701 assert_return(tid, -EINVAL);
3702 assert_return(!event_pid_changed(e), -ECHILD);
3703
3704 if (e->tid != 0) {
3705 *tid = e->tid;
3706 return 0;
3707 }
3708
3709 return -ENXIO;
3710 }
3711
3712 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3713 int r;
3714
3715 assert_return(e, -EINVAL);
3716 assert_return(e = event_resolve(e), -ENOPKG);
3717 assert_return(!event_pid_changed(e), -ECHILD);
3718
3719 if (e->watchdog == !!b)
3720 return e->watchdog;
3721
3722 if (b) {
3723 struct epoll_event ev;
3724
3725 r = sd_watchdog_enabled(false, &e->watchdog_period);
3726 if (r <= 0)
3727 return r;
3728
3729 /* Issue first ping immediately */
3730 sd_notify(false, "WATCHDOG=1");
3731 e->watchdog_last = now(CLOCK_MONOTONIC);
3732
3733 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3734 if (e->watchdog_fd < 0)
3735 return -errno;
3736
3737 r = arm_watchdog(e);
3738 if (r < 0)
3739 goto fail;
3740
3741 ev = (struct epoll_event) {
3742 .events = EPOLLIN,
3743 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3744 };
3745
3746 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3747 if (r < 0) {
3748 r = -errno;
3749 goto fail;
3750 }
3751
3752 } else {
3753 if (e->watchdog_fd >= 0) {
3754 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3755 e->watchdog_fd = safe_close(e->watchdog_fd);
3756 }
3757 }
3758
3759 e->watchdog = !!b;
3760 return e->watchdog;
3761
3762 fail:
3763 e->watchdog_fd = safe_close(e->watchdog_fd);
3764 return r;
3765 }
3766
3767 _public_ int sd_event_get_watchdog(sd_event *e) {
3768 assert_return(e, -EINVAL);
3769 assert_return(e = event_resolve(e), -ENOPKG);
3770 assert_return(!event_pid_changed(e), -ECHILD);
3771
3772 return e->watchdog;
3773 }
3774
3775 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3776 assert_return(e, -EINVAL);
3777 assert_return(e = event_resolve(e), -ENOPKG);
3778 assert_return(!event_pid_changed(e), -ECHILD);
3779
3780 *ret = e->iteration;
3781 return 0;
3782 }
3783
3784 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3785 assert_return(s, -EINVAL);
3786
3787 s->destroy_callback = callback;
3788 return 0;
3789 }
3790
3791 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3792 assert_return(s, -EINVAL);
3793
3794 if (ret)
3795 *ret = s->destroy_callback;
3796
3797 return !!s->destroy_callback;
3798 }