]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
tree-wide: use DEFINE_TRIVIAL_REF_UNREF_FUNC() macro or friends where applicable
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "fd-util.h"
13 #include "fs-util.h"
14 #include "hashmap.h"
15 #include "list.h"
16 #include "macro.h"
17 #include "missing.h"
18 #include "prioq.h"
19 #include "process-util.h"
20 #include "set.h"
21 #include "signal-util.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "time-util.h"
25 #include "util.h"
26
27 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
28
29 typedef enum EventSourceType {
30 SOURCE_IO,
31 SOURCE_TIME_REALTIME,
32 SOURCE_TIME_BOOTTIME,
33 SOURCE_TIME_MONOTONIC,
34 SOURCE_TIME_REALTIME_ALARM,
35 SOURCE_TIME_BOOTTIME_ALARM,
36 SOURCE_SIGNAL,
37 SOURCE_CHILD,
38 SOURCE_DEFER,
39 SOURCE_POST,
40 SOURCE_EXIT,
41 SOURCE_WATCHDOG,
42 SOURCE_INOTIFY,
43 _SOURCE_EVENT_SOURCE_TYPE_MAX,
44 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
45 } EventSourceType;
46
47 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
48 [SOURCE_IO] = "io",
49 [SOURCE_TIME_REALTIME] = "realtime",
50 [SOURCE_TIME_BOOTTIME] = "bootime",
51 [SOURCE_TIME_MONOTONIC] = "monotonic",
52 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
53 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
54 [SOURCE_SIGNAL] = "signal",
55 [SOURCE_CHILD] = "child",
56 [SOURCE_DEFER] = "defer",
57 [SOURCE_POST] = "post",
58 [SOURCE_EXIT] = "exit",
59 [SOURCE_WATCHDOG] = "watchdog",
60 [SOURCE_INOTIFY] = "inotify",
61 };
62
63 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
64
65 /* All objects we use in epoll events start with this value, so that
66 * we know how to dispatch it */
67 typedef enum WakeupType {
68 WAKEUP_NONE,
69 WAKEUP_EVENT_SOURCE,
70 WAKEUP_CLOCK_DATA,
71 WAKEUP_SIGNAL_DATA,
72 WAKEUP_INOTIFY_DATA,
73 _WAKEUP_TYPE_MAX,
74 _WAKEUP_TYPE_INVALID = -1,
75 } WakeupType;
76
77 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
78
79 struct inode_data;
80
81 struct sd_event_source {
82 WakeupType wakeup;
83
84 unsigned n_ref;
85
86 sd_event *event;
87 void *userdata;
88 sd_event_handler_t prepare;
89
90 char *description;
91
92 EventSourceType type:5;
93 signed int enabled:3;
94 bool pending:1;
95 bool dispatching:1;
96 bool floating:1;
97
98 int64_t priority;
99 unsigned pending_index;
100 unsigned prepare_index;
101 uint64_t pending_iteration;
102 uint64_t prepare_iteration;
103
104 sd_event_destroy_t destroy_callback;
105
106 LIST_FIELDS(sd_event_source, sources);
107
108 union {
109 struct {
110 sd_event_io_handler_t callback;
111 int fd;
112 uint32_t events;
113 uint32_t revents;
114 bool registered:1;
115 bool owned:1;
116 } io;
117 struct {
118 sd_event_time_handler_t callback;
119 usec_t next, accuracy;
120 unsigned earliest_index;
121 unsigned latest_index;
122 } time;
123 struct {
124 sd_event_signal_handler_t callback;
125 struct signalfd_siginfo siginfo;
126 int sig;
127 } signal;
128 struct {
129 sd_event_child_handler_t callback;
130 siginfo_t siginfo;
131 pid_t pid;
132 int options;
133 } child;
134 struct {
135 sd_event_handler_t callback;
136 } defer;
137 struct {
138 sd_event_handler_t callback;
139 } post;
140 struct {
141 sd_event_handler_t callback;
142 unsigned prioq_index;
143 } exit;
144 struct {
145 sd_event_inotify_handler_t callback;
146 uint32_t mask;
147 struct inode_data *inode_data;
148 LIST_FIELDS(sd_event_source, by_inode_data);
149 } inotify;
150 };
151 };
152
153 struct clock_data {
154 WakeupType wakeup;
155 int fd;
156
157 /* For all clocks we maintain two priority queues each, one
158 * ordered for the earliest times the events may be
159 * dispatched, and one ordered by the latest times they must
160 * have been dispatched. The range between the top entries in
161 * the two prioqs is the time window we can freely schedule
162 * wakeups in */
163
164 Prioq *earliest;
165 Prioq *latest;
166 usec_t next;
167
168 bool needs_rearm:1;
169 };
170
171 struct signal_data {
172 WakeupType wakeup;
173
174 /* For each priority we maintain one signal fd, so that we
175 * only have to dequeue a single event per priority at a
176 * time. */
177
178 int fd;
179 int64_t priority;
180 sigset_t sigset;
181 sd_event_source *current;
182 };
183
184 /* A structure listing all event sources currently watching a specific inode */
185 struct inode_data {
186 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
187 ino_t ino;
188 dev_t dev;
189
190 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
191 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
192 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
193 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
194 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
195 * iteration. */
196 int fd;
197
198 /* The inotify "watch descriptor" */
199 int wd;
200
201 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
202 * most recently been set on the watch descriptor. */
203 uint32_t combined_mask;
204
205 /* All event sources subscribed to this inode */
206 LIST_HEAD(sd_event_source, event_sources);
207
208 /* The inotify object we watch this inode with */
209 struct inotify_data *inotify_data;
210
211 /* A linked list of all inode data objects with fds to close (see above) */
212 LIST_FIELDS(struct inode_data, to_close);
213 };
214
215 /* A structure encapsulating an inotify fd */
216 struct inotify_data {
217 WakeupType wakeup;
218
219 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
220 * a time */
221
222 int fd;
223 int64_t priority;
224
225 Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
226 Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
227
228 /* The buffer we read inotify events into */
229 union inotify_event_buffer buffer;
230 size_t buffer_filled; /* fill level of the buffer */
231
232 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
233 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
234 * the events locally if they can't be coalesced). */
235 unsigned n_pending;
236
237 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
238 * to make it efficient to figure out what inotify objects to process data on next. */
239 LIST_FIELDS(struct inotify_data, buffered);
240 };
241
242 struct sd_event {
243 unsigned n_ref;
244
245 int epoll_fd;
246 int watchdog_fd;
247
248 Prioq *pending;
249 Prioq *prepare;
250
251 /* timerfd_create() only supports these five clocks so far. We
252 * can add support for more clocks when the kernel learns to
253 * deal with them, too. */
254 struct clock_data realtime;
255 struct clock_data boottime;
256 struct clock_data monotonic;
257 struct clock_data realtime_alarm;
258 struct clock_data boottime_alarm;
259
260 usec_t perturb;
261
262 sd_event_source **signal_sources; /* indexed by signal number */
263 Hashmap *signal_data; /* indexed by priority */
264
265 Hashmap *child_sources;
266 unsigned n_enabled_child_sources;
267
268 Set *post_sources;
269
270 Prioq *exit;
271
272 Hashmap *inotify_data; /* indexed by priority */
273
274 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
275 LIST_HEAD(struct inode_data, inode_data_to_close);
276
277 /* A list of inotify objects that already have events buffered which aren't processed yet */
278 LIST_HEAD(struct inotify_data, inotify_data_buffered);
279
280 pid_t original_pid;
281
282 uint64_t iteration;
283 triple_timestamp timestamp;
284 int state;
285
286 bool exit_requested:1;
287 bool need_process_child:1;
288 bool watchdog:1;
289 bool profile_delays:1;
290
291 int exit_code;
292
293 pid_t tid;
294 sd_event **default_event_ptr;
295
296 usec_t watchdog_last, watchdog_period;
297
298 unsigned n_sources;
299
300 LIST_HEAD(sd_event_source, sources);
301
302 usec_t last_run, last_log;
303 unsigned delays[sizeof(usec_t) * 8];
304 };
305
306 static thread_local sd_event *default_event = NULL;
307
308 static void source_disconnect(sd_event_source *s);
309 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
310
311 static sd_event *event_resolve(sd_event *e) {
312 return e == SD_EVENT_DEFAULT ? default_event : e;
313 }
314
315 static int pending_prioq_compare(const void *a, const void *b) {
316 const sd_event_source *x = a, *y = b;
317
318 assert(x->pending);
319 assert(y->pending);
320
321 /* Enabled ones first */
322 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
323 return -1;
324 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
325 return 1;
326
327 /* Lower priority values first */
328 if (x->priority < y->priority)
329 return -1;
330 if (x->priority > y->priority)
331 return 1;
332
333 /* Older entries first */
334 if (x->pending_iteration < y->pending_iteration)
335 return -1;
336 if (x->pending_iteration > y->pending_iteration)
337 return 1;
338
339 return 0;
340 }
341
342 static int prepare_prioq_compare(const void *a, const void *b) {
343 const sd_event_source *x = a, *y = b;
344
345 assert(x->prepare);
346 assert(y->prepare);
347
348 /* Enabled ones first */
349 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
350 return -1;
351 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
352 return 1;
353
354 /* Move most recently prepared ones last, so that we can stop
355 * preparing as soon as we hit one that has already been
356 * prepared in the current iteration */
357 if (x->prepare_iteration < y->prepare_iteration)
358 return -1;
359 if (x->prepare_iteration > y->prepare_iteration)
360 return 1;
361
362 /* Lower priority values first */
363 if (x->priority < y->priority)
364 return -1;
365 if (x->priority > y->priority)
366 return 1;
367
368 return 0;
369 }
370
371 static int earliest_time_prioq_compare(const void *a, const void *b) {
372 const sd_event_source *x = a, *y = b;
373
374 assert(EVENT_SOURCE_IS_TIME(x->type));
375 assert(x->type == y->type);
376
377 /* Enabled ones first */
378 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
379 return -1;
380 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
381 return 1;
382
383 /* Move the pending ones to the end */
384 if (!x->pending && y->pending)
385 return -1;
386 if (x->pending && !y->pending)
387 return 1;
388
389 /* Order by time */
390 if (x->time.next < y->time.next)
391 return -1;
392 if (x->time.next > y->time.next)
393 return 1;
394
395 return 0;
396 }
397
398 static usec_t time_event_source_latest(const sd_event_source *s) {
399 return usec_add(s->time.next, s->time.accuracy);
400 }
401
402 static int latest_time_prioq_compare(const void *a, const void *b) {
403 const sd_event_source *x = a, *y = b;
404
405 assert(EVENT_SOURCE_IS_TIME(x->type));
406 assert(x->type == y->type);
407
408 /* Enabled ones first */
409 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
410 return -1;
411 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
412 return 1;
413
414 /* Move the pending ones to the end */
415 if (!x->pending && y->pending)
416 return -1;
417 if (x->pending && !y->pending)
418 return 1;
419
420 /* Order by time */
421 if (time_event_source_latest(x) < time_event_source_latest(y))
422 return -1;
423 if (time_event_source_latest(x) > time_event_source_latest(y))
424 return 1;
425
426 return 0;
427 }
428
429 static int exit_prioq_compare(const void *a, const void *b) {
430 const sd_event_source *x = a, *y = b;
431
432 assert(x->type == SOURCE_EXIT);
433 assert(y->type == SOURCE_EXIT);
434
435 /* Enabled ones first */
436 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
437 return -1;
438 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
439 return 1;
440
441 /* Lower priority values first */
442 if (x->priority < y->priority)
443 return -1;
444 if (x->priority > y->priority)
445 return 1;
446
447 return 0;
448 }
449
450 static void free_clock_data(struct clock_data *d) {
451 assert(d);
452 assert(d->wakeup == WAKEUP_CLOCK_DATA);
453
454 safe_close(d->fd);
455 prioq_free(d->earliest);
456 prioq_free(d->latest);
457 }
458
459 static sd_event *event_free(sd_event *e) {
460 sd_event_source *s;
461
462 assert(e);
463
464 while ((s = e->sources)) {
465 assert(s->floating);
466 source_disconnect(s);
467 sd_event_source_unref(s);
468 }
469
470 assert(e->n_sources == 0);
471
472 if (e->default_event_ptr)
473 *(e->default_event_ptr) = NULL;
474
475 safe_close(e->epoll_fd);
476 safe_close(e->watchdog_fd);
477
478 free_clock_data(&e->realtime);
479 free_clock_data(&e->boottime);
480 free_clock_data(&e->monotonic);
481 free_clock_data(&e->realtime_alarm);
482 free_clock_data(&e->boottime_alarm);
483
484 prioq_free(e->pending);
485 prioq_free(e->prepare);
486 prioq_free(e->exit);
487
488 free(e->signal_sources);
489 hashmap_free(e->signal_data);
490
491 hashmap_free(e->inotify_data);
492
493 hashmap_free(e->child_sources);
494 set_free(e->post_sources);
495
496 return mfree(e);
497 }
498
499 _public_ int sd_event_new(sd_event** ret) {
500 sd_event *e;
501 int r;
502
503 assert_return(ret, -EINVAL);
504
505 e = new(sd_event, 1);
506 if (!e)
507 return -ENOMEM;
508
509 *e = (sd_event) {
510 .n_ref = 1,
511 .epoll_fd = -1,
512 .watchdog_fd = -1,
513 .realtime.wakeup = WAKEUP_CLOCK_DATA,
514 .realtime.fd = -1,
515 .realtime.next = USEC_INFINITY,
516 .boottime.wakeup = WAKEUP_CLOCK_DATA,
517 .boottime.fd = -1,
518 .boottime.next = USEC_INFINITY,
519 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
520 .monotonic.fd = -1,
521 .monotonic.next = USEC_INFINITY,
522 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
523 .realtime_alarm.fd = -1,
524 .realtime_alarm.next = USEC_INFINITY,
525 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
526 .boottime_alarm.fd = -1,
527 .boottime_alarm.next = USEC_INFINITY,
528 .perturb = USEC_INFINITY,
529 .original_pid = getpid_cached(),
530 };
531
532 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
533 if (r < 0)
534 goto fail;
535
536 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
537 if (e->epoll_fd < 0) {
538 r = -errno;
539 goto fail;
540 }
541
542 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
543
544 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
545 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
546 e->profile_delays = true;
547 }
548
549 *ret = e;
550 return 0;
551
552 fail:
553 event_free(e);
554 return r;
555 }
556
557 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
558
559 static bool event_pid_changed(sd_event *e) {
560 assert(e);
561
562 /* We don't support people creating an event loop and keeping
563 * it around over a fork(). Let's complain. */
564
565 return e->original_pid != getpid_cached();
566 }
567
568 static void source_io_unregister(sd_event_source *s) {
569 int r;
570
571 assert(s);
572 assert(s->type == SOURCE_IO);
573
574 if (event_pid_changed(s->event))
575 return;
576
577 if (!s->io.registered)
578 return;
579
580 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
581 if (r < 0)
582 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
583 strna(s->description), event_source_type_to_string(s->type));
584
585 s->io.registered = false;
586 }
587
588 static int source_io_register(
589 sd_event_source *s,
590 int enabled,
591 uint32_t events) {
592
593 struct epoll_event ev;
594 int r;
595
596 assert(s);
597 assert(s->type == SOURCE_IO);
598 assert(enabled != SD_EVENT_OFF);
599
600 ev = (struct epoll_event) {
601 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
602 .data.ptr = s,
603 };
604
605 if (s->io.registered)
606 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
607 else
608 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
609 if (r < 0)
610 return -errno;
611
612 s->io.registered = true;
613
614 return 0;
615 }
616
617 static clockid_t event_source_type_to_clock(EventSourceType t) {
618
619 switch (t) {
620
621 case SOURCE_TIME_REALTIME:
622 return CLOCK_REALTIME;
623
624 case SOURCE_TIME_BOOTTIME:
625 return CLOCK_BOOTTIME;
626
627 case SOURCE_TIME_MONOTONIC:
628 return CLOCK_MONOTONIC;
629
630 case SOURCE_TIME_REALTIME_ALARM:
631 return CLOCK_REALTIME_ALARM;
632
633 case SOURCE_TIME_BOOTTIME_ALARM:
634 return CLOCK_BOOTTIME_ALARM;
635
636 default:
637 return (clockid_t) -1;
638 }
639 }
640
641 static EventSourceType clock_to_event_source_type(clockid_t clock) {
642
643 switch (clock) {
644
645 case CLOCK_REALTIME:
646 return SOURCE_TIME_REALTIME;
647
648 case CLOCK_BOOTTIME:
649 return SOURCE_TIME_BOOTTIME;
650
651 case CLOCK_MONOTONIC:
652 return SOURCE_TIME_MONOTONIC;
653
654 case CLOCK_REALTIME_ALARM:
655 return SOURCE_TIME_REALTIME_ALARM;
656
657 case CLOCK_BOOTTIME_ALARM:
658 return SOURCE_TIME_BOOTTIME_ALARM;
659
660 default:
661 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
662 }
663 }
664
665 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
666 assert(e);
667
668 switch (t) {
669
670 case SOURCE_TIME_REALTIME:
671 return &e->realtime;
672
673 case SOURCE_TIME_BOOTTIME:
674 return &e->boottime;
675
676 case SOURCE_TIME_MONOTONIC:
677 return &e->monotonic;
678
679 case SOURCE_TIME_REALTIME_ALARM:
680 return &e->realtime_alarm;
681
682 case SOURCE_TIME_BOOTTIME_ALARM:
683 return &e->boottime_alarm;
684
685 default:
686 return NULL;
687 }
688 }
689
690 static int event_make_signal_data(
691 sd_event *e,
692 int sig,
693 struct signal_data **ret) {
694
695 struct epoll_event ev;
696 struct signal_data *d;
697 bool added = false;
698 sigset_t ss_copy;
699 int64_t priority;
700 int r;
701
702 assert(e);
703
704 if (event_pid_changed(e))
705 return -ECHILD;
706
707 if (e->signal_sources && e->signal_sources[sig])
708 priority = e->signal_sources[sig]->priority;
709 else
710 priority = SD_EVENT_PRIORITY_NORMAL;
711
712 d = hashmap_get(e->signal_data, &priority);
713 if (d) {
714 if (sigismember(&d->sigset, sig) > 0) {
715 if (ret)
716 *ret = d;
717 return 0;
718 }
719 } else {
720 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
721 if (r < 0)
722 return r;
723
724 d = new(struct signal_data, 1);
725 if (!d)
726 return -ENOMEM;
727
728 *d = (struct signal_data) {
729 .wakeup = WAKEUP_SIGNAL_DATA,
730 .fd = -1,
731 .priority = priority,
732 };
733
734 r = hashmap_put(e->signal_data, &d->priority, d);
735 if (r < 0) {
736 free(d);
737 return r;
738 }
739
740 added = true;
741 }
742
743 ss_copy = d->sigset;
744 assert_se(sigaddset(&ss_copy, sig) >= 0);
745
746 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
747 if (r < 0) {
748 r = -errno;
749 goto fail;
750 }
751
752 d->sigset = ss_copy;
753
754 if (d->fd >= 0) {
755 if (ret)
756 *ret = d;
757 return 0;
758 }
759
760 d->fd = fd_move_above_stdio(r);
761
762 ev = (struct epoll_event) {
763 .events = EPOLLIN,
764 .data.ptr = d,
765 };
766
767 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
768 if (r < 0) {
769 r = -errno;
770 goto fail;
771 }
772
773 if (ret)
774 *ret = d;
775
776 return 0;
777
778 fail:
779 if (added) {
780 d->fd = safe_close(d->fd);
781 hashmap_remove(e->signal_data, &d->priority);
782 free(d);
783 }
784
785 return r;
786 }
787
788 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
789 assert(e);
790 assert(d);
791
792 /* Turns off the specified signal in the signal data
793 * object. If the signal mask of the object becomes empty that
794 * way removes it. */
795
796 if (sigismember(&d->sigset, sig) == 0)
797 return;
798
799 assert_se(sigdelset(&d->sigset, sig) >= 0);
800
801 if (sigisemptyset(&d->sigset)) {
802
803 /* If all the mask is all-zero we can get rid of the structure */
804 hashmap_remove(e->signal_data, &d->priority);
805 safe_close(d->fd);
806 free(d);
807 return;
808 }
809
810 assert(d->fd >= 0);
811
812 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
813 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
814 }
815
816 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
817 struct signal_data *d;
818 static const int64_t zero_priority = 0;
819
820 assert(e);
821
822 /* Rechecks if the specified signal is still something we are
823 * interested in. If not, we'll unmask it, and possibly drop
824 * the signalfd for it. */
825
826 if (sig == SIGCHLD &&
827 e->n_enabled_child_sources > 0)
828 return;
829
830 if (e->signal_sources &&
831 e->signal_sources[sig] &&
832 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
833 return;
834
835 /*
836 * The specified signal might be enabled in three different queues:
837 *
838 * 1) the one that belongs to the priority passed (if it is non-NULL)
839 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
840 * 3) the 0 priority (to cover the SIGCHLD case)
841 *
842 * Hence, let's remove it from all three here.
843 */
844
845 if (priority) {
846 d = hashmap_get(e->signal_data, priority);
847 if (d)
848 event_unmask_signal_data(e, d, sig);
849 }
850
851 if (e->signal_sources && e->signal_sources[sig]) {
852 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
853 if (d)
854 event_unmask_signal_data(e, d, sig);
855 }
856
857 d = hashmap_get(e->signal_data, &zero_priority);
858 if (d)
859 event_unmask_signal_data(e, d, sig);
860 }
861
862 static void source_disconnect(sd_event_source *s) {
863 sd_event *event;
864
865 assert(s);
866
867 if (!s->event)
868 return;
869
870 assert(s->event->n_sources > 0);
871
872 switch (s->type) {
873
874 case SOURCE_IO:
875 if (s->io.fd >= 0)
876 source_io_unregister(s);
877
878 break;
879
880 case SOURCE_TIME_REALTIME:
881 case SOURCE_TIME_BOOTTIME:
882 case SOURCE_TIME_MONOTONIC:
883 case SOURCE_TIME_REALTIME_ALARM:
884 case SOURCE_TIME_BOOTTIME_ALARM: {
885 struct clock_data *d;
886
887 d = event_get_clock_data(s->event, s->type);
888 assert(d);
889
890 prioq_remove(d->earliest, s, &s->time.earliest_index);
891 prioq_remove(d->latest, s, &s->time.latest_index);
892 d->needs_rearm = true;
893 break;
894 }
895
896 case SOURCE_SIGNAL:
897 if (s->signal.sig > 0) {
898
899 if (s->event->signal_sources)
900 s->event->signal_sources[s->signal.sig] = NULL;
901
902 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
903 }
904
905 break;
906
907 case SOURCE_CHILD:
908 if (s->child.pid > 0) {
909 if (s->enabled != SD_EVENT_OFF) {
910 assert(s->event->n_enabled_child_sources > 0);
911 s->event->n_enabled_child_sources--;
912 }
913
914 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
915 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
916 }
917
918 break;
919
920 case SOURCE_DEFER:
921 /* nothing */
922 break;
923
924 case SOURCE_POST:
925 set_remove(s->event->post_sources, s);
926 break;
927
928 case SOURCE_EXIT:
929 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
930 break;
931
932 case SOURCE_INOTIFY: {
933 struct inode_data *inode_data;
934
935 inode_data = s->inotify.inode_data;
936 if (inode_data) {
937 struct inotify_data *inotify_data;
938 assert_se(inotify_data = inode_data->inotify_data);
939
940 /* Detach this event source from the inode object */
941 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
942 s->inotify.inode_data = NULL;
943
944 if (s->pending) {
945 assert(inotify_data->n_pending > 0);
946 inotify_data->n_pending--;
947 }
948
949 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
950 * continued to being watched. That's because inotify doesn't really have an API for that: we
951 * can only change watch masks with access to the original inode either by fd or by path. But
952 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
953 * continously and keeping the mount busy which we can't really do. We could reconstruct the
954 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
955 * there), but given the need for open_by_handle_at() which is privileged and not universally
956 * available this would be quite an incomplete solution. Hence we go the other way, leave the
957 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
958 * anymore after reception. Yes, this sucks, but … Linux … */
959
960 /* Maybe release the inode data (and its inotify) */
961 event_gc_inode_data(s->event, inode_data);
962 }
963
964 break;
965 }
966
967 default:
968 assert_not_reached("Wut? I shouldn't exist.");
969 }
970
971 if (s->pending)
972 prioq_remove(s->event->pending, s, &s->pending_index);
973
974 if (s->prepare)
975 prioq_remove(s->event->prepare, s, &s->prepare_index);
976
977 event = s->event;
978
979 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
980 s->event = NULL;
981 LIST_REMOVE(sources, event->sources, s);
982 event->n_sources--;
983
984 if (!s->floating)
985 sd_event_unref(event);
986 }
987
988 static void source_free(sd_event_source *s) {
989 assert(s);
990
991 source_disconnect(s);
992
993 if (s->type == SOURCE_IO && s->io.owned)
994 s->io.fd = safe_close(s->io.fd);
995
996 if (s->destroy_callback)
997 s->destroy_callback(s->userdata);
998
999 free(s->description);
1000 free(s);
1001 }
1002
1003 static int source_set_pending(sd_event_source *s, bool b) {
1004 int r;
1005
1006 assert(s);
1007 assert(s->type != SOURCE_EXIT);
1008
1009 if (s->pending == b)
1010 return 0;
1011
1012 s->pending = b;
1013
1014 if (b) {
1015 s->pending_iteration = s->event->iteration;
1016
1017 r = prioq_put(s->event->pending, s, &s->pending_index);
1018 if (r < 0) {
1019 s->pending = false;
1020 return r;
1021 }
1022 } else
1023 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1024
1025 if (EVENT_SOURCE_IS_TIME(s->type)) {
1026 struct clock_data *d;
1027
1028 d = event_get_clock_data(s->event, s->type);
1029 assert(d);
1030
1031 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1032 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1033 d->needs_rearm = true;
1034 }
1035
1036 if (s->type == SOURCE_SIGNAL && !b) {
1037 struct signal_data *d;
1038
1039 d = hashmap_get(s->event->signal_data, &s->priority);
1040 if (d && d->current == s)
1041 d->current = NULL;
1042 }
1043
1044 if (s->type == SOURCE_INOTIFY) {
1045
1046 assert(s->inotify.inode_data);
1047 assert(s->inotify.inode_data->inotify_data);
1048
1049 if (b)
1050 s->inotify.inode_data->inotify_data->n_pending ++;
1051 else {
1052 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1053 s->inotify.inode_data->inotify_data->n_pending --;
1054 }
1055 }
1056
1057 return 0;
1058 }
1059
1060 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1061 sd_event_source *s;
1062
1063 assert(e);
1064
1065 s = new(sd_event_source, 1);
1066 if (!s)
1067 return NULL;
1068
1069 *s = (struct sd_event_source) {
1070 .n_ref = 1,
1071 .event = e,
1072 .floating = floating,
1073 .type = type,
1074 .pending_index = PRIOQ_IDX_NULL,
1075 .prepare_index = PRIOQ_IDX_NULL,
1076 };
1077
1078 if (!floating)
1079 sd_event_ref(e);
1080
1081 LIST_PREPEND(sources, e->sources, s);
1082 e->n_sources++;
1083
1084 return s;
1085 }
1086
1087 _public_ int sd_event_add_io(
1088 sd_event *e,
1089 sd_event_source **ret,
1090 int fd,
1091 uint32_t events,
1092 sd_event_io_handler_t callback,
1093 void *userdata) {
1094
1095 sd_event_source *s;
1096 int r;
1097
1098 assert_return(e, -EINVAL);
1099 assert_return(e = event_resolve(e), -ENOPKG);
1100 assert_return(fd >= 0, -EBADF);
1101 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1102 assert_return(callback, -EINVAL);
1103 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1104 assert_return(!event_pid_changed(e), -ECHILD);
1105
1106 s = source_new(e, !ret, SOURCE_IO);
1107 if (!s)
1108 return -ENOMEM;
1109
1110 s->wakeup = WAKEUP_EVENT_SOURCE;
1111 s->io.fd = fd;
1112 s->io.events = events;
1113 s->io.callback = callback;
1114 s->userdata = userdata;
1115 s->enabled = SD_EVENT_ON;
1116
1117 r = source_io_register(s, s->enabled, events);
1118 if (r < 0) {
1119 source_free(s);
1120 return r;
1121 }
1122
1123 if (ret)
1124 *ret = s;
1125
1126 return 0;
1127 }
1128
1129 static void initialize_perturb(sd_event *e) {
1130 sd_id128_t bootid = {};
1131
1132 /* When we sleep for longer, we try to realign the wakeup to
1133 the same time wihtin each minute/second/250ms, so that
1134 events all across the system can be coalesced into a single
1135 CPU wakeup. However, let's take some system-specific
1136 randomness for this value, so that in a network of systems
1137 with synced clocks timer events are distributed a
1138 bit. Here, we calculate a perturbation usec offset from the
1139 boot ID. */
1140
1141 if (_likely_(e->perturb != USEC_INFINITY))
1142 return;
1143
1144 if (sd_id128_get_boot(&bootid) >= 0)
1145 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1146 }
1147
1148 static int event_setup_timer_fd(
1149 sd_event *e,
1150 struct clock_data *d,
1151 clockid_t clock) {
1152
1153 struct epoll_event ev;
1154 int r, fd;
1155
1156 assert(e);
1157 assert(d);
1158
1159 if (_likely_(d->fd >= 0))
1160 return 0;
1161
1162 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1163 if (fd < 0)
1164 return -errno;
1165
1166 fd = fd_move_above_stdio(fd);
1167
1168 ev = (struct epoll_event) {
1169 .events = EPOLLIN,
1170 .data.ptr = d,
1171 };
1172
1173 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1174 if (r < 0) {
1175 safe_close(fd);
1176 return -errno;
1177 }
1178
1179 d->fd = fd;
1180 return 0;
1181 }
1182
1183 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1184 assert(s);
1185
1186 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1187 }
1188
1189 _public_ int sd_event_add_time(
1190 sd_event *e,
1191 sd_event_source **ret,
1192 clockid_t clock,
1193 uint64_t usec,
1194 uint64_t accuracy,
1195 sd_event_time_handler_t callback,
1196 void *userdata) {
1197
1198 EventSourceType type;
1199 sd_event_source *s;
1200 struct clock_data *d;
1201 int r;
1202
1203 assert_return(e, -EINVAL);
1204 assert_return(e = event_resolve(e), -ENOPKG);
1205 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1206 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1207 assert_return(!event_pid_changed(e), -ECHILD);
1208
1209 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1210 return -EOPNOTSUPP;
1211
1212 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1213 if (type < 0)
1214 return -EOPNOTSUPP;
1215
1216 if (!callback)
1217 callback = time_exit_callback;
1218
1219 d = event_get_clock_data(e, type);
1220 assert(d);
1221
1222 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1223 if (r < 0)
1224 return r;
1225
1226 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1227 if (r < 0)
1228 return r;
1229
1230 if (d->fd < 0) {
1231 r = event_setup_timer_fd(e, d, clock);
1232 if (r < 0)
1233 return r;
1234 }
1235
1236 s = source_new(e, !ret, type);
1237 if (!s)
1238 return -ENOMEM;
1239
1240 s->time.next = usec;
1241 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1242 s->time.callback = callback;
1243 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1244 s->userdata = userdata;
1245 s->enabled = SD_EVENT_ONESHOT;
1246
1247 d->needs_rearm = true;
1248
1249 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1250 if (r < 0)
1251 goto fail;
1252
1253 r = prioq_put(d->latest, s, &s->time.latest_index);
1254 if (r < 0)
1255 goto fail;
1256
1257 if (ret)
1258 *ret = s;
1259
1260 return 0;
1261
1262 fail:
1263 source_free(s);
1264 return r;
1265 }
1266
1267 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1268 assert(s);
1269
1270 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1271 }
1272
1273 _public_ int sd_event_add_signal(
1274 sd_event *e,
1275 sd_event_source **ret,
1276 int sig,
1277 sd_event_signal_handler_t callback,
1278 void *userdata) {
1279
1280 sd_event_source *s;
1281 struct signal_data *d;
1282 sigset_t ss;
1283 int r;
1284
1285 assert_return(e, -EINVAL);
1286 assert_return(e = event_resolve(e), -ENOPKG);
1287 assert_return(SIGNAL_VALID(sig), -EINVAL);
1288 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1289 assert_return(!event_pid_changed(e), -ECHILD);
1290
1291 if (!callback)
1292 callback = signal_exit_callback;
1293
1294 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1295 if (r != 0)
1296 return -r;
1297
1298 if (!sigismember(&ss, sig))
1299 return -EBUSY;
1300
1301 if (!e->signal_sources) {
1302 e->signal_sources = new0(sd_event_source*, _NSIG);
1303 if (!e->signal_sources)
1304 return -ENOMEM;
1305 } else if (e->signal_sources[sig])
1306 return -EBUSY;
1307
1308 s = source_new(e, !ret, SOURCE_SIGNAL);
1309 if (!s)
1310 return -ENOMEM;
1311
1312 s->signal.sig = sig;
1313 s->signal.callback = callback;
1314 s->userdata = userdata;
1315 s->enabled = SD_EVENT_ON;
1316
1317 e->signal_sources[sig] = s;
1318
1319 r = event_make_signal_data(e, sig, &d);
1320 if (r < 0) {
1321 source_free(s);
1322 return r;
1323 }
1324
1325 /* Use the signal name as description for the event source by default */
1326 (void) sd_event_source_set_description(s, signal_to_string(sig));
1327
1328 if (ret)
1329 *ret = s;
1330
1331 return 0;
1332 }
1333
1334 _public_ int sd_event_add_child(
1335 sd_event *e,
1336 sd_event_source **ret,
1337 pid_t pid,
1338 int options,
1339 sd_event_child_handler_t callback,
1340 void *userdata) {
1341
1342 sd_event_source *s;
1343 int r;
1344
1345 assert_return(e, -EINVAL);
1346 assert_return(e = event_resolve(e), -ENOPKG);
1347 assert_return(pid > 1, -EINVAL);
1348 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1349 assert_return(options != 0, -EINVAL);
1350 assert_return(callback, -EINVAL);
1351 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1352 assert_return(!event_pid_changed(e), -ECHILD);
1353
1354 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1355 if (r < 0)
1356 return r;
1357
1358 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1359 return -EBUSY;
1360
1361 s = source_new(e, !ret, SOURCE_CHILD);
1362 if (!s)
1363 return -ENOMEM;
1364
1365 s->child.pid = pid;
1366 s->child.options = options;
1367 s->child.callback = callback;
1368 s->userdata = userdata;
1369 s->enabled = SD_EVENT_ONESHOT;
1370
1371 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1372 if (r < 0) {
1373 source_free(s);
1374 return r;
1375 }
1376
1377 e->n_enabled_child_sources++;
1378
1379 r = event_make_signal_data(e, SIGCHLD, NULL);
1380 if (r < 0) {
1381 e->n_enabled_child_sources--;
1382 source_free(s);
1383 return r;
1384 }
1385
1386 e->need_process_child = true;
1387
1388 if (ret)
1389 *ret = s;
1390
1391 return 0;
1392 }
1393
1394 _public_ int sd_event_add_defer(
1395 sd_event *e,
1396 sd_event_source **ret,
1397 sd_event_handler_t callback,
1398 void *userdata) {
1399
1400 sd_event_source *s;
1401 int r;
1402
1403 assert_return(e, -EINVAL);
1404 assert_return(e = event_resolve(e), -ENOPKG);
1405 assert_return(callback, -EINVAL);
1406 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1407 assert_return(!event_pid_changed(e), -ECHILD);
1408
1409 s = source_new(e, !ret, SOURCE_DEFER);
1410 if (!s)
1411 return -ENOMEM;
1412
1413 s->defer.callback = callback;
1414 s->userdata = userdata;
1415 s->enabled = SD_EVENT_ONESHOT;
1416
1417 r = source_set_pending(s, true);
1418 if (r < 0) {
1419 source_free(s);
1420 return r;
1421 }
1422
1423 if (ret)
1424 *ret = s;
1425
1426 return 0;
1427 }
1428
1429 _public_ int sd_event_add_post(
1430 sd_event *e,
1431 sd_event_source **ret,
1432 sd_event_handler_t callback,
1433 void *userdata) {
1434
1435 sd_event_source *s;
1436 int r;
1437
1438 assert_return(e, -EINVAL);
1439 assert_return(e = event_resolve(e), -ENOPKG);
1440 assert_return(callback, -EINVAL);
1441 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1442 assert_return(!event_pid_changed(e), -ECHILD);
1443
1444 r = set_ensure_allocated(&e->post_sources, NULL);
1445 if (r < 0)
1446 return r;
1447
1448 s = source_new(e, !ret, SOURCE_POST);
1449 if (!s)
1450 return -ENOMEM;
1451
1452 s->post.callback = callback;
1453 s->userdata = userdata;
1454 s->enabled = SD_EVENT_ON;
1455
1456 r = set_put(e->post_sources, s);
1457 if (r < 0) {
1458 source_free(s);
1459 return r;
1460 }
1461
1462 if (ret)
1463 *ret = s;
1464
1465 return 0;
1466 }
1467
1468 _public_ int sd_event_add_exit(
1469 sd_event *e,
1470 sd_event_source **ret,
1471 sd_event_handler_t callback,
1472 void *userdata) {
1473
1474 sd_event_source *s;
1475 int r;
1476
1477 assert_return(e, -EINVAL);
1478 assert_return(e = event_resolve(e), -ENOPKG);
1479 assert_return(callback, -EINVAL);
1480 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1481 assert_return(!event_pid_changed(e), -ECHILD);
1482
1483 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1484 if (r < 0)
1485 return r;
1486
1487 s = source_new(e, !ret, SOURCE_EXIT);
1488 if (!s)
1489 return -ENOMEM;
1490
1491 s->exit.callback = callback;
1492 s->userdata = userdata;
1493 s->exit.prioq_index = PRIOQ_IDX_NULL;
1494 s->enabled = SD_EVENT_ONESHOT;
1495
1496 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1497 if (r < 0) {
1498 source_free(s);
1499 return r;
1500 }
1501
1502 if (ret)
1503 *ret = s;
1504
1505 return 0;
1506 }
1507
1508 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1509 assert(e);
1510
1511 if (!d)
1512 return;
1513
1514 assert(hashmap_isempty(d->inodes));
1515 assert(hashmap_isempty(d->wd));
1516
1517 if (d->buffer_filled > 0)
1518 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1519
1520 hashmap_free(d->inodes);
1521 hashmap_free(d->wd);
1522
1523 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1524
1525 if (d->fd >= 0) {
1526 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1527 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1528
1529 safe_close(d->fd);
1530 }
1531 free(d);
1532 }
1533
1534 static int event_make_inotify_data(
1535 sd_event *e,
1536 int64_t priority,
1537 struct inotify_data **ret) {
1538
1539 _cleanup_close_ int fd = -1;
1540 struct inotify_data *d;
1541 struct epoll_event ev;
1542 int r;
1543
1544 assert(e);
1545
1546 d = hashmap_get(e->inotify_data, &priority);
1547 if (d) {
1548 if (ret)
1549 *ret = d;
1550 return 0;
1551 }
1552
1553 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1554 if (fd < 0)
1555 return -errno;
1556
1557 fd = fd_move_above_stdio(fd);
1558
1559 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1560 if (r < 0)
1561 return r;
1562
1563 d = new(struct inotify_data, 1);
1564 if (!d)
1565 return -ENOMEM;
1566
1567 *d = (struct inotify_data) {
1568 .wakeup = WAKEUP_INOTIFY_DATA,
1569 .fd = TAKE_FD(fd),
1570 .priority = priority,
1571 };
1572
1573 r = hashmap_put(e->inotify_data, &d->priority, d);
1574 if (r < 0) {
1575 d->fd = safe_close(d->fd);
1576 free(d);
1577 return r;
1578 }
1579
1580 ev = (struct epoll_event) {
1581 .events = EPOLLIN,
1582 .data.ptr = d,
1583 };
1584
1585 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1586 r = -errno;
1587 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1588 * remove the fd from the epoll first, which we don't want as we couldn't
1589 * add it in the first place. */
1590 event_free_inotify_data(e, d);
1591 return r;
1592 }
1593
1594 if (ret)
1595 *ret = d;
1596
1597 return 1;
1598 }
1599
1600 static int inode_data_compare(const void *a, const void *b) {
1601 const struct inode_data *x = a, *y = b;
1602
1603 assert(x);
1604 assert(y);
1605
1606 if (x->dev < y->dev)
1607 return -1;
1608 if (x->dev > y->dev)
1609 return 1;
1610
1611 if (x->ino < y->ino)
1612 return -1;
1613 if (x->ino > y->ino)
1614 return 1;
1615
1616 return 0;
1617 }
1618
1619 static void inode_data_hash_func(const void *p, struct siphash *state) {
1620 const struct inode_data *d = p;
1621
1622 assert(p);
1623
1624 siphash24_compress(&d->dev, sizeof(d->dev), state);
1625 siphash24_compress(&d->ino, sizeof(d->ino), state);
1626 }
1627
1628 const struct hash_ops inode_data_hash_ops = {
1629 .hash = inode_data_hash_func,
1630 .compare = inode_data_compare
1631 };
1632
1633 static void event_free_inode_data(
1634 sd_event *e,
1635 struct inode_data *d) {
1636
1637 assert(e);
1638
1639 if (!d)
1640 return;
1641
1642 assert(!d->event_sources);
1643
1644 if (d->fd >= 0) {
1645 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1646 safe_close(d->fd);
1647 }
1648
1649 if (d->inotify_data) {
1650
1651 if (d->wd >= 0) {
1652 if (d->inotify_data->fd >= 0) {
1653 /* So here's a problem. At the time this runs the watch descriptor might already be
1654 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1655 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1656 * likely case to happen. */
1657
1658 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1659 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1660 }
1661
1662 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1663 }
1664
1665 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1666 }
1667
1668 free(d);
1669 }
1670
1671 static void event_gc_inode_data(
1672 sd_event *e,
1673 struct inode_data *d) {
1674
1675 struct inotify_data *inotify_data;
1676
1677 assert(e);
1678
1679 if (!d)
1680 return;
1681
1682 if (d->event_sources)
1683 return;
1684
1685 inotify_data = d->inotify_data;
1686 event_free_inode_data(e, d);
1687
1688 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1689 event_free_inotify_data(e, inotify_data);
1690 }
1691
1692 static int event_make_inode_data(
1693 sd_event *e,
1694 struct inotify_data *inotify_data,
1695 dev_t dev,
1696 ino_t ino,
1697 struct inode_data **ret) {
1698
1699 struct inode_data *d, key;
1700 int r;
1701
1702 assert(e);
1703 assert(inotify_data);
1704
1705 key = (struct inode_data) {
1706 .ino = ino,
1707 .dev = dev,
1708 };
1709
1710 d = hashmap_get(inotify_data->inodes, &key);
1711 if (d) {
1712 if (ret)
1713 *ret = d;
1714
1715 return 0;
1716 }
1717
1718 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1719 if (r < 0)
1720 return r;
1721
1722 d = new(struct inode_data, 1);
1723 if (!d)
1724 return -ENOMEM;
1725
1726 *d = (struct inode_data) {
1727 .dev = dev,
1728 .ino = ino,
1729 .wd = -1,
1730 .fd = -1,
1731 .inotify_data = inotify_data,
1732 };
1733
1734 r = hashmap_put(inotify_data->inodes, d, d);
1735 if (r < 0) {
1736 free(d);
1737 return r;
1738 }
1739
1740 if (ret)
1741 *ret = d;
1742
1743 return 1;
1744 }
1745
1746 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1747 bool excl_unlink = true;
1748 uint32_t combined = 0;
1749 sd_event_source *s;
1750
1751 assert(d);
1752
1753 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1754 * the IN_EXCL_UNLINK flag is ANDed instead.
1755 *
1756 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1757 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1758 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1759 * events we don't care for client-side. */
1760
1761 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1762
1763 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1764 excl_unlink = false;
1765
1766 combined |= s->inotify.mask;
1767 }
1768
1769 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1770 }
1771
1772 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1773 uint32_t combined_mask;
1774 int wd, r;
1775
1776 assert(d);
1777 assert(d->fd >= 0);
1778
1779 combined_mask = inode_data_determine_mask(d);
1780
1781 if (d->wd >= 0 && combined_mask == d->combined_mask)
1782 return 0;
1783
1784 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1785 if (r < 0)
1786 return r;
1787
1788 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1789 if (wd < 0)
1790 return -errno;
1791
1792 if (d->wd < 0) {
1793 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1794 if (r < 0) {
1795 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1796 return r;
1797 }
1798
1799 d->wd = wd;
1800
1801 } else if (d->wd != wd) {
1802
1803 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1804 (void) inotify_rm_watch(d->fd, wd);
1805 return -EINVAL;
1806 }
1807
1808 d->combined_mask = combined_mask;
1809 return 1;
1810 }
1811
1812 _public_ int sd_event_add_inotify(
1813 sd_event *e,
1814 sd_event_source **ret,
1815 const char *path,
1816 uint32_t mask,
1817 sd_event_inotify_handler_t callback,
1818 void *userdata) {
1819
1820 bool rm_inotify = false, rm_inode = false;
1821 struct inotify_data *inotify_data = NULL;
1822 struct inode_data *inode_data = NULL;
1823 _cleanup_close_ int fd = -1;
1824 sd_event_source *s;
1825 struct stat st;
1826 int r;
1827
1828 assert_return(e, -EINVAL);
1829 assert_return(e = event_resolve(e), -ENOPKG);
1830 assert_return(path, -EINVAL);
1831 assert_return(callback, -EINVAL);
1832 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1833 assert_return(!event_pid_changed(e), -ECHILD);
1834
1835 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1836 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1837 * the user can't use them for us. */
1838 if (mask & IN_MASK_ADD)
1839 return -EINVAL;
1840
1841 fd = open(path, O_PATH|O_CLOEXEC|
1842 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1843 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1844 if (fd < 0)
1845 return -errno;
1846
1847 if (fstat(fd, &st) < 0)
1848 return -errno;
1849
1850 s = source_new(e, !ret, SOURCE_INOTIFY);
1851 if (!s)
1852 return -ENOMEM;
1853
1854 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1855 s->inotify.mask = mask;
1856 s->inotify.callback = callback;
1857 s->userdata = userdata;
1858
1859 /* Allocate an inotify object for this priority, and an inode object within it */
1860 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1861 if (r < 0)
1862 goto fail;
1863 rm_inotify = r > 0;
1864
1865 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1866 if (r < 0)
1867 goto fail;
1868 rm_inode = r > 0;
1869
1870 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1871 * the event source, until then, for which we need the original inode. */
1872 if (inode_data->fd < 0) {
1873 inode_data->fd = TAKE_FD(fd);
1874 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1875 }
1876
1877 /* Link our event source to the inode data object */
1878 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1879 s->inotify.inode_data = inode_data;
1880
1881 rm_inode = rm_inotify = false;
1882
1883 /* Actually realize the watch now */
1884 r = inode_data_realize_watch(e, inode_data);
1885 if (r < 0)
1886 goto fail;
1887
1888 (void) sd_event_source_set_description(s, path);
1889
1890 if (ret)
1891 *ret = s;
1892
1893 return 0;
1894
1895 fail:
1896 source_free(s);
1897
1898 if (rm_inode)
1899 event_free_inode_data(e, inode_data);
1900
1901 if (rm_inotify)
1902 event_free_inotify_data(e, inotify_data);
1903
1904 return r;
1905 }
1906
1907 static sd_event_source* event_source_free(sd_event_source *s) {
1908 if (!s)
1909 return NULL;
1910
1911 /* Here's a special hack: when we are called from a
1912 * dispatch handler we won't free the event source
1913 * immediately, but we will detach the fd from the
1914 * epoll. This way it is safe for the caller to unref
1915 * the event source and immediately close the fd, but
1916 * we still retain a valid event source object after
1917 * the callback. */
1918
1919 if (s->dispatching) {
1920 if (s->type == SOURCE_IO)
1921 source_io_unregister(s);
1922
1923 source_disconnect(s);
1924 } else
1925 source_free(s);
1926
1927 return NULL;
1928 }
1929
1930 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1931
1932 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1933 assert_return(s, -EINVAL);
1934 assert_return(!event_pid_changed(s->event), -ECHILD);
1935
1936 return free_and_strdup(&s->description, description);
1937 }
1938
1939 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1940 assert_return(s, -EINVAL);
1941 assert_return(description, -EINVAL);
1942 assert_return(s->description, -ENXIO);
1943 assert_return(!event_pid_changed(s->event), -ECHILD);
1944
1945 *description = s->description;
1946 return 0;
1947 }
1948
1949 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1950 assert_return(s, NULL);
1951
1952 return s->event;
1953 }
1954
1955 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1956 assert_return(s, -EINVAL);
1957 assert_return(s->type != SOURCE_EXIT, -EDOM);
1958 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1959 assert_return(!event_pid_changed(s->event), -ECHILD);
1960
1961 return s->pending;
1962 }
1963
1964 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1965 assert_return(s, -EINVAL);
1966 assert_return(s->type == SOURCE_IO, -EDOM);
1967 assert_return(!event_pid_changed(s->event), -ECHILD);
1968
1969 return s->io.fd;
1970 }
1971
1972 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1973 int r;
1974
1975 assert_return(s, -EINVAL);
1976 assert_return(fd >= 0, -EBADF);
1977 assert_return(s->type == SOURCE_IO, -EDOM);
1978 assert_return(!event_pid_changed(s->event), -ECHILD);
1979
1980 if (s->io.fd == fd)
1981 return 0;
1982
1983 if (s->enabled == SD_EVENT_OFF) {
1984 s->io.fd = fd;
1985 s->io.registered = false;
1986 } else {
1987 int saved_fd;
1988
1989 saved_fd = s->io.fd;
1990 assert(s->io.registered);
1991
1992 s->io.fd = fd;
1993 s->io.registered = false;
1994
1995 r = source_io_register(s, s->enabled, s->io.events);
1996 if (r < 0) {
1997 s->io.fd = saved_fd;
1998 s->io.registered = true;
1999 return r;
2000 }
2001
2002 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2003 }
2004
2005 return 0;
2006 }
2007
2008 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2009 assert_return(s, -EINVAL);
2010 assert_return(s->type == SOURCE_IO, -EDOM);
2011
2012 return s->io.owned;
2013 }
2014
2015 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2016 assert_return(s, -EINVAL);
2017 assert_return(s->type == SOURCE_IO, -EDOM);
2018
2019 s->io.owned = own;
2020 return 0;
2021 }
2022
2023 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2024 assert_return(s, -EINVAL);
2025 assert_return(events, -EINVAL);
2026 assert_return(s->type == SOURCE_IO, -EDOM);
2027 assert_return(!event_pid_changed(s->event), -ECHILD);
2028
2029 *events = s->io.events;
2030 return 0;
2031 }
2032
2033 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2034 int r;
2035
2036 assert_return(s, -EINVAL);
2037 assert_return(s->type == SOURCE_IO, -EDOM);
2038 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2039 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2040 assert_return(!event_pid_changed(s->event), -ECHILD);
2041
2042 /* edge-triggered updates are never skipped, so we can reset edges */
2043 if (s->io.events == events && !(events & EPOLLET))
2044 return 0;
2045
2046 r = source_set_pending(s, false);
2047 if (r < 0)
2048 return r;
2049
2050 if (s->enabled != SD_EVENT_OFF) {
2051 r = source_io_register(s, s->enabled, events);
2052 if (r < 0)
2053 return r;
2054 }
2055
2056 s->io.events = events;
2057
2058 return 0;
2059 }
2060
2061 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2062 assert_return(s, -EINVAL);
2063 assert_return(revents, -EINVAL);
2064 assert_return(s->type == SOURCE_IO, -EDOM);
2065 assert_return(s->pending, -ENODATA);
2066 assert_return(!event_pid_changed(s->event), -ECHILD);
2067
2068 *revents = s->io.revents;
2069 return 0;
2070 }
2071
2072 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2073 assert_return(s, -EINVAL);
2074 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2075 assert_return(!event_pid_changed(s->event), -ECHILD);
2076
2077 return s->signal.sig;
2078 }
2079
2080 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2081 assert_return(s, -EINVAL);
2082 assert_return(!event_pid_changed(s->event), -ECHILD);
2083
2084 *priority = s->priority;
2085 return 0;
2086 }
2087
2088 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2089 bool rm_inotify = false, rm_inode = false;
2090 struct inotify_data *new_inotify_data = NULL;
2091 struct inode_data *new_inode_data = NULL;
2092 int r;
2093
2094 assert_return(s, -EINVAL);
2095 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2096 assert_return(!event_pid_changed(s->event), -ECHILD);
2097
2098 if (s->priority == priority)
2099 return 0;
2100
2101 if (s->type == SOURCE_INOTIFY) {
2102 struct inode_data *old_inode_data;
2103
2104 assert(s->inotify.inode_data);
2105 old_inode_data = s->inotify.inode_data;
2106
2107 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2108 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2109 * events we allow priority changes only until the first following iteration. */
2110 if (old_inode_data->fd < 0)
2111 return -EOPNOTSUPP;
2112
2113 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2114 if (r < 0)
2115 return r;
2116 rm_inotify = r > 0;
2117
2118 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2119 if (r < 0)
2120 goto fail;
2121 rm_inode = r > 0;
2122
2123 if (new_inode_data->fd < 0) {
2124 /* Duplicate the fd for the new inode object if we don't have any yet */
2125 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2126 if (new_inode_data->fd < 0) {
2127 r = -errno;
2128 goto fail;
2129 }
2130
2131 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2132 }
2133
2134 /* Move the event source to the new inode data structure */
2135 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2136 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2137 s->inotify.inode_data = new_inode_data;
2138
2139 /* Now create the new watch */
2140 r = inode_data_realize_watch(s->event, new_inode_data);
2141 if (r < 0) {
2142 /* Move it back */
2143 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2144 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2145 s->inotify.inode_data = old_inode_data;
2146 goto fail;
2147 }
2148
2149 s->priority = priority;
2150
2151 event_gc_inode_data(s->event, old_inode_data);
2152
2153 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2154 struct signal_data *old, *d;
2155
2156 /* Move us from the signalfd belonging to the old
2157 * priority to the signalfd of the new priority */
2158
2159 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2160
2161 s->priority = priority;
2162
2163 r = event_make_signal_data(s->event, s->signal.sig, &d);
2164 if (r < 0) {
2165 s->priority = old->priority;
2166 return r;
2167 }
2168
2169 event_unmask_signal_data(s->event, old, s->signal.sig);
2170 } else
2171 s->priority = priority;
2172
2173 if (s->pending)
2174 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2175
2176 if (s->prepare)
2177 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2178
2179 if (s->type == SOURCE_EXIT)
2180 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2181
2182 return 0;
2183
2184 fail:
2185 if (rm_inode)
2186 event_free_inode_data(s->event, new_inode_data);
2187
2188 if (rm_inotify)
2189 event_free_inotify_data(s->event, new_inotify_data);
2190
2191 return r;
2192 }
2193
2194 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2195 assert_return(s, -EINVAL);
2196 assert_return(m, -EINVAL);
2197 assert_return(!event_pid_changed(s->event), -ECHILD);
2198
2199 *m = s->enabled;
2200 return 0;
2201 }
2202
2203 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2204 int r;
2205
2206 assert_return(s, -EINVAL);
2207 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2208 assert_return(!event_pid_changed(s->event), -ECHILD);
2209
2210 /* If we are dead anyway, we are fine with turning off
2211 * sources, but everything else needs to fail. */
2212 if (s->event->state == SD_EVENT_FINISHED)
2213 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2214
2215 if (s->enabled == m)
2216 return 0;
2217
2218 if (m == SD_EVENT_OFF) {
2219
2220 /* Unset the pending flag when this event source is disabled */
2221 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2222 r = source_set_pending(s, false);
2223 if (r < 0)
2224 return r;
2225 }
2226
2227 switch (s->type) {
2228
2229 case SOURCE_IO:
2230 source_io_unregister(s);
2231 s->enabled = m;
2232 break;
2233
2234 case SOURCE_TIME_REALTIME:
2235 case SOURCE_TIME_BOOTTIME:
2236 case SOURCE_TIME_MONOTONIC:
2237 case SOURCE_TIME_REALTIME_ALARM:
2238 case SOURCE_TIME_BOOTTIME_ALARM: {
2239 struct clock_data *d;
2240
2241 s->enabled = m;
2242 d = event_get_clock_data(s->event, s->type);
2243 assert(d);
2244
2245 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2246 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2247 d->needs_rearm = true;
2248 break;
2249 }
2250
2251 case SOURCE_SIGNAL:
2252 s->enabled = m;
2253
2254 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2255 break;
2256
2257 case SOURCE_CHILD:
2258 s->enabled = m;
2259
2260 assert(s->event->n_enabled_child_sources > 0);
2261 s->event->n_enabled_child_sources--;
2262
2263 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2264 break;
2265
2266 case SOURCE_EXIT:
2267 s->enabled = m;
2268 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2269 break;
2270
2271 case SOURCE_DEFER:
2272 case SOURCE_POST:
2273 case SOURCE_INOTIFY:
2274 s->enabled = m;
2275 break;
2276
2277 default:
2278 assert_not_reached("Wut? I shouldn't exist.");
2279 }
2280
2281 } else {
2282
2283 /* Unset the pending flag when this event source is enabled */
2284 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2285 r = source_set_pending(s, false);
2286 if (r < 0)
2287 return r;
2288 }
2289
2290 switch (s->type) {
2291
2292 case SOURCE_IO:
2293 r = source_io_register(s, m, s->io.events);
2294 if (r < 0)
2295 return r;
2296
2297 s->enabled = m;
2298 break;
2299
2300 case SOURCE_TIME_REALTIME:
2301 case SOURCE_TIME_BOOTTIME:
2302 case SOURCE_TIME_MONOTONIC:
2303 case SOURCE_TIME_REALTIME_ALARM:
2304 case SOURCE_TIME_BOOTTIME_ALARM: {
2305 struct clock_data *d;
2306
2307 s->enabled = m;
2308 d = event_get_clock_data(s->event, s->type);
2309 assert(d);
2310
2311 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2312 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2313 d->needs_rearm = true;
2314 break;
2315 }
2316
2317 case SOURCE_SIGNAL:
2318
2319 s->enabled = m;
2320
2321 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2322 if (r < 0) {
2323 s->enabled = SD_EVENT_OFF;
2324 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2325 return r;
2326 }
2327
2328 break;
2329
2330 case SOURCE_CHILD:
2331
2332 if (s->enabled == SD_EVENT_OFF)
2333 s->event->n_enabled_child_sources++;
2334
2335 s->enabled = m;
2336
2337 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2338 if (r < 0) {
2339 s->enabled = SD_EVENT_OFF;
2340 s->event->n_enabled_child_sources--;
2341 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2342 return r;
2343 }
2344
2345 break;
2346
2347 case SOURCE_EXIT:
2348 s->enabled = m;
2349 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2350 break;
2351
2352 case SOURCE_DEFER:
2353 case SOURCE_POST:
2354 case SOURCE_INOTIFY:
2355 s->enabled = m;
2356 break;
2357
2358 default:
2359 assert_not_reached("Wut? I shouldn't exist.");
2360 }
2361 }
2362
2363 if (s->pending)
2364 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2365
2366 if (s->prepare)
2367 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2368
2369 return 0;
2370 }
2371
2372 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2373 assert_return(s, -EINVAL);
2374 assert_return(usec, -EINVAL);
2375 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2376 assert_return(!event_pid_changed(s->event), -ECHILD);
2377
2378 *usec = s->time.next;
2379 return 0;
2380 }
2381
2382 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2383 struct clock_data *d;
2384 int r;
2385
2386 assert_return(s, -EINVAL);
2387 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2388 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2389 assert_return(!event_pid_changed(s->event), -ECHILD);
2390
2391 r = source_set_pending(s, false);
2392 if (r < 0)
2393 return r;
2394
2395 s->time.next = usec;
2396
2397 d = event_get_clock_data(s->event, s->type);
2398 assert(d);
2399
2400 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2401 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2402 d->needs_rearm = true;
2403
2404 return 0;
2405 }
2406
2407 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2408 assert_return(s, -EINVAL);
2409 assert_return(usec, -EINVAL);
2410 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2411 assert_return(!event_pid_changed(s->event), -ECHILD);
2412
2413 *usec = s->time.accuracy;
2414 return 0;
2415 }
2416
2417 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2418 struct clock_data *d;
2419 int r;
2420
2421 assert_return(s, -EINVAL);
2422 assert_return(usec != (uint64_t) -1, -EINVAL);
2423 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2424 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2425 assert_return(!event_pid_changed(s->event), -ECHILD);
2426
2427 r = source_set_pending(s, false);
2428 if (r < 0)
2429 return r;
2430
2431 if (usec == 0)
2432 usec = DEFAULT_ACCURACY_USEC;
2433
2434 s->time.accuracy = usec;
2435
2436 d = event_get_clock_data(s->event, s->type);
2437 assert(d);
2438
2439 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2440 d->needs_rearm = true;
2441
2442 return 0;
2443 }
2444
2445 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2446 assert_return(s, -EINVAL);
2447 assert_return(clock, -EINVAL);
2448 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2449 assert_return(!event_pid_changed(s->event), -ECHILD);
2450
2451 *clock = event_source_type_to_clock(s->type);
2452 return 0;
2453 }
2454
2455 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2456 assert_return(s, -EINVAL);
2457 assert_return(pid, -EINVAL);
2458 assert_return(s->type == SOURCE_CHILD, -EDOM);
2459 assert_return(!event_pid_changed(s->event), -ECHILD);
2460
2461 *pid = s->child.pid;
2462 return 0;
2463 }
2464
2465 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2466 assert_return(s, -EINVAL);
2467 assert_return(mask, -EINVAL);
2468 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2469 assert_return(!event_pid_changed(s->event), -ECHILD);
2470
2471 *mask = s->inotify.mask;
2472 return 0;
2473 }
2474
2475 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2476 int r;
2477
2478 assert_return(s, -EINVAL);
2479 assert_return(s->type != SOURCE_EXIT, -EDOM);
2480 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2481 assert_return(!event_pid_changed(s->event), -ECHILD);
2482
2483 if (s->prepare == callback)
2484 return 0;
2485
2486 if (callback && s->prepare) {
2487 s->prepare = callback;
2488 return 0;
2489 }
2490
2491 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2492 if (r < 0)
2493 return r;
2494
2495 s->prepare = callback;
2496
2497 if (callback) {
2498 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2499 if (r < 0)
2500 return r;
2501 } else
2502 prioq_remove(s->event->prepare, s, &s->prepare_index);
2503
2504 return 0;
2505 }
2506
2507 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2508 assert_return(s, NULL);
2509
2510 return s->userdata;
2511 }
2512
2513 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2514 void *ret;
2515
2516 assert_return(s, NULL);
2517
2518 ret = s->userdata;
2519 s->userdata = userdata;
2520
2521 return ret;
2522 }
2523
2524 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2525 usec_t c;
2526 assert(e);
2527 assert(a <= b);
2528
2529 if (a <= 0)
2530 return 0;
2531 if (a >= USEC_INFINITY)
2532 return USEC_INFINITY;
2533
2534 if (b <= a + 1)
2535 return a;
2536
2537 initialize_perturb(e);
2538
2539 /*
2540 Find a good time to wake up again between times a and b. We
2541 have two goals here:
2542
2543 a) We want to wake up as seldom as possible, hence prefer
2544 later times over earlier times.
2545
2546 b) But if we have to wake up, then let's make sure to
2547 dispatch as much as possible on the entire system.
2548
2549 We implement this by waking up everywhere at the same time
2550 within any given minute if we can, synchronised via the
2551 perturbation value determined from the boot ID. If we can't,
2552 then we try to find the same spot in every 10s, then 1s and
2553 then 250ms step. Otherwise, we pick the last possible time
2554 to wake up.
2555 */
2556
2557 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2558 if (c >= b) {
2559 if (_unlikely_(c < USEC_PER_MINUTE))
2560 return b;
2561
2562 c -= USEC_PER_MINUTE;
2563 }
2564
2565 if (c >= a)
2566 return c;
2567
2568 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2569 if (c >= b) {
2570 if (_unlikely_(c < USEC_PER_SEC*10))
2571 return b;
2572
2573 c -= USEC_PER_SEC*10;
2574 }
2575
2576 if (c >= a)
2577 return c;
2578
2579 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2580 if (c >= b) {
2581 if (_unlikely_(c < USEC_PER_SEC))
2582 return b;
2583
2584 c -= USEC_PER_SEC;
2585 }
2586
2587 if (c >= a)
2588 return c;
2589
2590 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2591 if (c >= b) {
2592 if (_unlikely_(c < USEC_PER_MSEC*250))
2593 return b;
2594
2595 c -= USEC_PER_MSEC*250;
2596 }
2597
2598 if (c >= a)
2599 return c;
2600
2601 return b;
2602 }
2603
2604 static int event_arm_timer(
2605 sd_event *e,
2606 struct clock_data *d) {
2607
2608 struct itimerspec its = {};
2609 sd_event_source *a, *b;
2610 usec_t t;
2611 int r;
2612
2613 assert(e);
2614 assert(d);
2615
2616 if (!d->needs_rearm)
2617 return 0;
2618 else
2619 d->needs_rearm = false;
2620
2621 a = prioq_peek(d->earliest);
2622 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2623
2624 if (d->fd < 0)
2625 return 0;
2626
2627 if (d->next == USEC_INFINITY)
2628 return 0;
2629
2630 /* disarm */
2631 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2632 if (r < 0)
2633 return r;
2634
2635 d->next = USEC_INFINITY;
2636 return 0;
2637 }
2638
2639 b = prioq_peek(d->latest);
2640 assert_se(b && b->enabled != SD_EVENT_OFF);
2641
2642 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2643 if (d->next == t)
2644 return 0;
2645
2646 assert_se(d->fd >= 0);
2647
2648 if (t == 0) {
2649 /* We don' want to disarm here, just mean some time looooong ago. */
2650 its.it_value.tv_sec = 0;
2651 its.it_value.tv_nsec = 1;
2652 } else
2653 timespec_store(&its.it_value, t);
2654
2655 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2656 if (r < 0)
2657 return -errno;
2658
2659 d->next = t;
2660 return 0;
2661 }
2662
2663 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2664 assert(e);
2665 assert(s);
2666 assert(s->type == SOURCE_IO);
2667
2668 /* If the event source was already pending, we just OR in the
2669 * new revents, otherwise we reset the value. The ORing is
2670 * necessary to handle EPOLLONESHOT events properly where
2671 * readability might happen independently of writability, and
2672 * we need to keep track of both */
2673
2674 if (s->pending)
2675 s->io.revents |= revents;
2676 else
2677 s->io.revents = revents;
2678
2679 return source_set_pending(s, true);
2680 }
2681
2682 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2683 uint64_t x;
2684 ssize_t ss;
2685
2686 assert(e);
2687 assert(fd >= 0);
2688
2689 assert_return(events == EPOLLIN, -EIO);
2690
2691 ss = read(fd, &x, sizeof(x));
2692 if (ss < 0) {
2693 if (IN_SET(errno, EAGAIN, EINTR))
2694 return 0;
2695
2696 return -errno;
2697 }
2698
2699 if (_unlikely_(ss != sizeof(x)))
2700 return -EIO;
2701
2702 if (next)
2703 *next = USEC_INFINITY;
2704
2705 return 0;
2706 }
2707
2708 static int process_timer(
2709 sd_event *e,
2710 usec_t n,
2711 struct clock_data *d) {
2712
2713 sd_event_source *s;
2714 int r;
2715
2716 assert(e);
2717 assert(d);
2718
2719 for (;;) {
2720 s = prioq_peek(d->earliest);
2721 if (!s ||
2722 s->time.next > n ||
2723 s->enabled == SD_EVENT_OFF ||
2724 s->pending)
2725 break;
2726
2727 r = source_set_pending(s, true);
2728 if (r < 0)
2729 return r;
2730
2731 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2732 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2733 d->needs_rearm = true;
2734 }
2735
2736 return 0;
2737 }
2738
2739 static int process_child(sd_event *e) {
2740 sd_event_source *s;
2741 Iterator i;
2742 int r;
2743
2744 assert(e);
2745
2746 e->need_process_child = false;
2747
2748 /*
2749 So, this is ugly. We iteratively invoke waitid() with P_PID
2750 + WNOHANG for each PID we wait for, instead of using
2751 P_ALL. This is because we only want to get child
2752 information of very specific child processes, and not all
2753 of them. We might not have processed the SIGCHLD even of a
2754 previous invocation and we don't want to maintain a
2755 unbounded *per-child* event queue, hence we really don't
2756 want anything flushed out of the kernel's queue that we
2757 don't care about. Since this is O(n) this means that if you
2758 have a lot of processes you probably want to handle SIGCHLD
2759 yourself.
2760
2761 We do not reap the children here (by using WNOWAIT), this
2762 is only done after the event source is dispatched so that
2763 the callback still sees the process as a zombie.
2764 */
2765
2766 HASHMAP_FOREACH(s, e->child_sources, i) {
2767 assert(s->type == SOURCE_CHILD);
2768
2769 if (s->pending)
2770 continue;
2771
2772 if (s->enabled == SD_EVENT_OFF)
2773 continue;
2774
2775 zero(s->child.siginfo);
2776 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2777 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2778 if (r < 0)
2779 return -errno;
2780
2781 if (s->child.siginfo.si_pid != 0) {
2782 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2783
2784 if (!zombie && (s->child.options & WEXITED)) {
2785 /* If the child isn't dead then let's
2786 * immediately remove the state change
2787 * from the queue, since there's no
2788 * benefit in leaving it queued */
2789
2790 assert(s->child.options & (WSTOPPED|WCONTINUED));
2791 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2792 }
2793
2794 r = source_set_pending(s, true);
2795 if (r < 0)
2796 return r;
2797 }
2798 }
2799
2800 return 0;
2801 }
2802
2803 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2804 bool read_one = false;
2805 int r;
2806
2807 assert(e);
2808 assert(d);
2809 assert_return(events == EPOLLIN, -EIO);
2810
2811 /* If there's a signal queued on this priority and SIGCHLD is
2812 on this priority too, then make sure to recheck the
2813 children we watch. This is because we only ever dequeue
2814 the first signal per priority, and if we dequeue one, and
2815 SIGCHLD might be enqueued later we wouldn't know, but we
2816 might have higher priority children we care about hence we
2817 need to check that explicitly. */
2818
2819 if (sigismember(&d->sigset, SIGCHLD))
2820 e->need_process_child = true;
2821
2822 /* If there's already an event source pending for this
2823 * priority we don't read another */
2824 if (d->current)
2825 return 0;
2826
2827 for (;;) {
2828 struct signalfd_siginfo si;
2829 ssize_t n;
2830 sd_event_source *s = NULL;
2831
2832 n = read(d->fd, &si, sizeof(si));
2833 if (n < 0) {
2834 if (IN_SET(errno, EAGAIN, EINTR))
2835 return read_one;
2836
2837 return -errno;
2838 }
2839
2840 if (_unlikely_(n != sizeof(si)))
2841 return -EIO;
2842
2843 assert(SIGNAL_VALID(si.ssi_signo));
2844
2845 read_one = true;
2846
2847 if (e->signal_sources)
2848 s = e->signal_sources[si.ssi_signo];
2849 if (!s)
2850 continue;
2851 if (s->pending)
2852 continue;
2853
2854 s->signal.siginfo = si;
2855 d->current = s;
2856
2857 r = source_set_pending(s, true);
2858 if (r < 0)
2859 return r;
2860
2861 return 1;
2862 }
2863 }
2864
2865 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2866 ssize_t n;
2867
2868 assert(e);
2869 assert(d);
2870
2871 assert_return(revents == EPOLLIN, -EIO);
2872
2873 /* If there's already an event source pending for this priority, don't read another */
2874 if (d->n_pending > 0)
2875 return 0;
2876
2877 /* Is the read buffer non-empty? If so, let's not read more */
2878 if (d->buffer_filled > 0)
2879 return 0;
2880
2881 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2882 if (n < 0) {
2883 if (IN_SET(errno, EAGAIN, EINTR))
2884 return 0;
2885
2886 return -errno;
2887 }
2888
2889 assert(n > 0);
2890 d->buffer_filled = (size_t) n;
2891 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2892
2893 return 1;
2894 }
2895
2896 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2897 assert(e);
2898 assert(d);
2899 assert(sz <= d->buffer_filled);
2900
2901 if (sz == 0)
2902 return;
2903
2904 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2905 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2906 d->buffer_filled -= sz;
2907
2908 if (d->buffer_filled == 0)
2909 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2910 }
2911
2912 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2913 int r;
2914
2915 assert(e);
2916 assert(d);
2917
2918 /* If there's already an event source pending for this priority, don't read another */
2919 if (d->n_pending > 0)
2920 return 0;
2921
2922 while (d->buffer_filled > 0) {
2923 size_t sz;
2924
2925 /* Let's validate that the event structures are complete */
2926 if (d->buffer_filled < offsetof(struct inotify_event, name))
2927 return -EIO;
2928
2929 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2930 if (d->buffer_filled < sz)
2931 return -EIO;
2932
2933 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2934 struct inode_data *inode_data;
2935 Iterator i;
2936
2937 /* The queue overran, let's pass this event to all event sources connected to this inotify
2938 * object */
2939
2940 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2941 sd_event_source *s;
2942
2943 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2944
2945 if (s->enabled == SD_EVENT_OFF)
2946 continue;
2947
2948 r = source_set_pending(s, true);
2949 if (r < 0)
2950 return r;
2951 }
2952 }
2953 } else {
2954 struct inode_data *inode_data;
2955 sd_event_source *s;
2956
2957 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2958 * our watch descriptor table. */
2959 if (d->buffer.ev.mask & IN_IGNORED) {
2960
2961 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2962 if (!inode_data) {
2963 event_inotify_data_drop(e, d, sz);
2964 continue;
2965 }
2966
2967 /* The watch descriptor was removed by the kernel, let's drop it here too */
2968 inode_data->wd = -1;
2969 } else {
2970 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2971 if (!inode_data) {
2972 event_inotify_data_drop(e, d, sz);
2973 continue;
2974 }
2975 }
2976
2977 /* Trigger all event sources that are interested in these events. Also trigger all event
2978 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2979 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2980
2981 if (s->enabled == SD_EVENT_OFF)
2982 continue;
2983
2984 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2985 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2986 continue;
2987
2988 r = source_set_pending(s, true);
2989 if (r < 0)
2990 return r;
2991 }
2992 }
2993
2994 /* Something pending now? If so, let's finish, otherwise let's read more. */
2995 if (d->n_pending > 0)
2996 return 1;
2997 }
2998
2999 return 0;
3000 }
3001
3002 static int process_inotify(sd_event *e) {
3003 struct inotify_data *d;
3004 int r, done = 0;
3005
3006 assert(e);
3007
3008 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3009 r = event_inotify_data_process(e, d);
3010 if (r < 0)
3011 return r;
3012 if (r > 0)
3013 done ++;
3014 }
3015
3016 return done;
3017 }
3018
3019 static int source_dispatch(sd_event_source *s) {
3020 EventSourceType saved_type;
3021 int r = 0;
3022
3023 assert(s);
3024 assert(s->pending || s->type == SOURCE_EXIT);
3025
3026 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3027 * the event. */
3028 saved_type = s->type;
3029
3030 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3031 r = source_set_pending(s, false);
3032 if (r < 0)
3033 return r;
3034 }
3035
3036 if (s->type != SOURCE_POST) {
3037 sd_event_source *z;
3038 Iterator i;
3039
3040 /* If we execute a non-post source, let's mark all
3041 * post sources as pending */
3042
3043 SET_FOREACH(z, s->event->post_sources, i) {
3044 if (z->enabled == SD_EVENT_OFF)
3045 continue;
3046
3047 r = source_set_pending(z, true);
3048 if (r < 0)
3049 return r;
3050 }
3051 }
3052
3053 if (s->enabled == SD_EVENT_ONESHOT) {
3054 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3055 if (r < 0)
3056 return r;
3057 }
3058
3059 s->dispatching = true;
3060
3061 switch (s->type) {
3062
3063 case SOURCE_IO:
3064 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3065 break;
3066
3067 case SOURCE_TIME_REALTIME:
3068 case SOURCE_TIME_BOOTTIME:
3069 case SOURCE_TIME_MONOTONIC:
3070 case SOURCE_TIME_REALTIME_ALARM:
3071 case SOURCE_TIME_BOOTTIME_ALARM:
3072 r = s->time.callback(s, s->time.next, s->userdata);
3073 break;
3074
3075 case SOURCE_SIGNAL:
3076 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3077 break;
3078
3079 case SOURCE_CHILD: {
3080 bool zombie;
3081
3082 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3083
3084 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3085
3086 /* Now, reap the PID for good. */
3087 if (zombie)
3088 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3089
3090 break;
3091 }
3092
3093 case SOURCE_DEFER:
3094 r = s->defer.callback(s, s->userdata);
3095 break;
3096
3097 case SOURCE_POST:
3098 r = s->post.callback(s, s->userdata);
3099 break;
3100
3101 case SOURCE_EXIT:
3102 r = s->exit.callback(s, s->userdata);
3103 break;
3104
3105 case SOURCE_INOTIFY: {
3106 struct sd_event *e = s->event;
3107 struct inotify_data *d;
3108 size_t sz;
3109
3110 assert(s->inotify.inode_data);
3111 assert_se(d = s->inotify.inode_data->inotify_data);
3112
3113 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3114 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3115 assert(d->buffer_filled >= sz);
3116
3117 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3118
3119 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3120 * buffer. */
3121 if (d->n_pending == 0)
3122 event_inotify_data_drop(e, d, sz);
3123
3124 break;
3125 }
3126
3127 case SOURCE_WATCHDOG:
3128 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3129 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3130 assert_not_reached("Wut? I shouldn't exist.");
3131 }
3132
3133 s->dispatching = false;
3134
3135 if (r < 0)
3136 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3137 strna(s->description), event_source_type_to_string(saved_type));
3138
3139 if (s->n_ref == 0)
3140 source_free(s);
3141 else if (r < 0)
3142 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3143
3144 return 1;
3145 }
3146
3147 static int event_prepare(sd_event *e) {
3148 int r;
3149
3150 assert(e);
3151
3152 for (;;) {
3153 sd_event_source *s;
3154
3155 s = prioq_peek(e->prepare);
3156 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3157 break;
3158
3159 s->prepare_iteration = e->iteration;
3160 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3161 if (r < 0)
3162 return r;
3163
3164 assert(s->prepare);
3165
3166 s->dispatching = true;
3167 r = s->prepare(s, s->userdata);
3168 s->dispatching = false;
3169
3170 if (r < 0)
3171 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3172 strna(s->description), event_source_type_to_string(s->type));
3173
3174 if (s->n_ref == 0)
3175 source_free(s);
3176 else if (r < 0)
3177 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3178 }
3179
3180 return 0;
3181 }
3182
3183 static int dispatch_exit(sd_event *e) {
3184 sd_event_source *p;
3185 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3186 int r;
3187
3188 assert(e);
3189
3190 p = prioq_peek(e->exit);
3191 if (!p || p->enabled == SD_EVENT_OFF) {
3192 e->state = SD_EVENT_FINISHED;
3193 return 0;
3194 }
3195
3196 ref = sd_event_ref(e);
3197 e->iteration++;
3198 e->state = SD_EVENT_EXITING;
3199 r = source_dispatch(p);
3200 e->state = SD_EVENT_INITIAL;
3201 return r;
3202 }
3203
3204 static sd_event_source* event_next_pending(sd_event *e) {
3205 sd_event_source *p;
3206
3207 assert(e);
3208
3209 p = prioq_peek(e->pending);
3210 if (!p)
3211 return NULL;
3212
3213 if (p->enabled == SD_EVENT_OFF)
3214 return NULL;
3215
3216 return p;
3217 }
3218
3219 static int arm_watchdog(sd_event *e) {
3220 struct itimerspec its = {};
3221 usec_t t;
3222 int r;
3223
3224 assert(e);
3225 assert(e->watchdog_fd >= 0);
3226
3227 t = sleep_between(e,
3228 e->watchdog_last + (e->watchdog_period / 2),
3229 e->watchdog_last + (e->watchdog_period * 3 / 4));
3230
3231 timespec_store(&its.it_value, t);
3232
3233 /* Make sure we never set the watchdog to 0, which tells the
3234 * kernel to disable it. */
3235 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3236 its.it_value.tv_nsec = 1;
3237
3238 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3239 if (r < 0)
3240 return -errno;
3241
3242 return 0;
3243 }
3244
3245 static int process_watchdog(sd_event *e) {
3246 assert(e);
3247
3248 if (!e->watchdog)
3249 return 0;
3250
3251 /* Don't notify watchdog too often */
3252 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3253 return 0;
3254
3255 sd_notify(false, "WATCHDOG=1");
3256 e->watchdog_last = e->timestamp.monotonic;
3257
3258 return arm_watchdog(e);
3259 }
3260
3261 static void event_close_inode_data_fds(sd_event *e) {
3262 struct inode_data *d;
3263
3264 assert(e);
3265
3266 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3267 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3268 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3269 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3270 * compromise. */
3271
3272 while ((d = e->inode_data_to_close)) {
3273 assert(d->fd >= 0);
3274 d->fd = safe_close(d->fd);
3275
3276 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3277 }
3278 }
3279
3280 _public_ int sd_event_prepare(sd_event *e) {
3281 int r;
3282
3283 assert_return(e, -EINVAL);
3284 assert_return(e = event_resolve(e), -ENOPKG);
3285 assert_return(!event_pid_changed(e), -ECHILD);
3286 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3287 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3288
3289 if (e->exit_requested)
3290 goto pending;
3291
3292 e->iteration++;
3293
3294 e->state = SD_EVENT_PREPARING;
3295 r = event_prepare(e);
3296 e->state = SD_EVENT_INITIAL;
3297 if (r < 0)
3298 return r;
3299
3300 r = event_arm_timer(e, &e->realtime);
3301 if (r < 0)
3302 return r;
3303
3304 r = event_arm_timer(e, &e->boottime);
3305 if (r < 0)
3306 return r;
3307
3308 r = event_arm_timer(e, &e->monotonic);
3309 if (r < 0)
3310 return r;
3311
3312 r = event_arm_timer(e, &e->realtime_alarm);
3313 if (r < 0)
3314 return r;
3315
3316 r = event_arm_timer(e, &e->boottime_alarm);
3317 if (r < 0)
3318 return r;
3319
3320 event_close_inode_data_fds(e);
3321
3322 if (event_next_pending(e) || e->need_process_child)
3323 goto pending;
3324
3325 e->state = SD_EVENT_ARMED;
3326
3327 return 0;
3328
3329 pending:
3330 e->state = SD_EVENT_ARMED;
3331 r = sd_event_wait(e, 0);
3332 if (r == 0)
3333 e->state = SD_EVENT_ARMED;
3334
3335 return r;
3336 }
3337
3338 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3339 struct epoll_event *ev_queue;
3340 unsigned ev_queue_max;
3341 int r, m, i;
3342
3343 assert_return(e, -EINVAL);
3344 assert_return(e = event_resolve(e), -ENOPKG);
3345 assert_return(!event_pid_changed(e), -ECHILD);
3346 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3347 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3348
3349 if (e->exit_requested) {
3350 e->state = SD_EVENT_PENDING;
3351 return 1;
3352 }
3353
3354 ev_queue_max = MAX(e->n_sources, 1u);
3355 ev_queue = newa(struct epoll_event, ev_queue_max);
3356
3357 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3358 if (e->inotify_data_buffered)
3359 timeout = 0;
3360
3361 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3362 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3363 if (m < 0) {
3364 if (errno == EINTR) {
3365 e->state = SD_EVENT_PENDING;
3366 return 1;
3367 }
3368
3369 r = -errno;
3370 goto finish;
3371 }
3372
3373 triple_timestamp_get(&e->timestamp);
3374
3375 for (i = 0; i < m; i++) {
3376
3377 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3378 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3379 else {
3380 WakeupType *t = ev_queue[i].data.ptr;
3381
3382 switch (*t) {
3383
3384 case WAKEUP_EVENT_SOURCE:
3385 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3386 break;
3387
3388 case WAKEUP_CLOCK_DATA: {
3389 struct clock_data *d = ev_queue[i].data.ptr;
3390 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3391 break;
3392 }
3393
3394 case WAKEUP_SIGNAL_DATA:
3395 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3396 break;
3397
3398 case WAKEUP_INOTIFY_DATA:
3399 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3400 break;
3401
3402 default:
3403 assert_not_reached("Invalid wake-up pointer");
3404 }
3405 }
3406 if (r < 0)
3407 goto finish;
3408 }
3409
3410 r = process_watchdog(e);
3411 if (r < 0)
3412 goto finish;
3413
3414 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3415 if (r < 0)
3416 goto finish;
3417
3418 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3419 if (r < 0)
3420 goto finish;
3421
3422 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3423 if (r < 0)
3424 goto finish;
3425
3426 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3427 if (r < 0)
3428 goto finish;
3429
3430 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3431 if (r < 0)
3432 goto finish;
3433
3434 if (e->need_process_child) {
3435 r = process_child(e);
3436 if (r < 0)
3437 goto finish;
3438 }
3439
3440 r = process_inotify(e);
3441 if (r < 0)
3442 goto finish;
3443
3444 if (event_next_pending(e)) {
3445 e->state = SD_EVENT_PENDING;
3446
3447 return 1;
3448 }
3449
3450 r = 0;
3451
3452 finish:
3453 e->state = SD_EVENT_INITIAL;
3454
3455 return r;
3456 }
3457
3458 _public_ int sd_event_dispatch(sd_event *e) {
3459 sd_event_source *p;
3460 int r;
3461
3462 assert_return(e, -EINVAL);
3463 assert_return(e = event_resolve(e), -ENOPKG);
3464 assert_return(!event_pid_changed(e), -ECHILD);
3465 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3466 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3467
3468 if (e->exit_requested)
3469 return dispatch_exit(e);
3470
3471 p = event_next_pending(e);
3472 if (p) {
3473 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3474
3475 ref = sd_event_ref(e);
3476 e->state = SD_EVENT_RUNNING;
3477 r = source_dispatch(p);
3478 e->state = SD_EVENT_INITIAL;
3479 return r;
3480 }
3481
3482 e->state = SD_EVENT_INITIAL;
3483
3484 return 1;
3485 }
3486
3487 static void event_log_delays(sd_event *e) {
3488 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3489 unsigned i;
3490 int o;
3491
3492 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3493 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3494 e->delays[i] = 0;
3495 }
3496 log_debug("Event loop iterations: %.*s", o, b);
3497 }
3498
3499 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3500 int r;
3501
3502 assert_return(e, -EINVAL);
3503 assert_return(e = event_resolve(e), -ENOPKG);
3504 assert_return(!event_pid_changed(e), -ECHILD);
3505 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3506 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3507
3508 if (e->profile_delays && e->last_run) {
3509 usec_t this_run;
3510 unsigned l;
3511
3512 this_run = now(CLOCK_MONOTONIC);
3513
3514 l = u64log2(this_run - e->last_run);
3515 assert(l < sizeof(e->delays));
3516 e->delays[l]++;
3517
3518 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3519 event_log_delays(e);
3520 e->last_log = this_run;
3521 }
3522 }
3523
3524 r = sd_event_prepare(e);
3525 if (r == 0)
3526 /* There was nothing? Then wait... */
3527 r = sd_event_wait(e, timeout);
3528
3529 if (e->profile_delays)
3530 e->last_run = now(CLOCK_MONOTONIC);
3531
3532 if (r > 0) {
3533 /* There's something now, then let's dispatch it */
3534 r = sd_event_dispatch(e);
3535 if (r < 0)
3536 return r;
3537
3538 return 1;
3539 }
3540
3541 return r;
3542 }
3543
3544 _public_ int sd_event_loop(sd_event *e) {
3545 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3546 int r;
3547
3548 assert_return(e, -EINVAL);
3549 assert_return(e = event_resolve(e), -ENOPKG);
3550 assert_return(!event_pid_changed(e), -ECHILD);
3551 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3552
3553 ref = sd_event_ref(e);
3554
3555 while (e->state != SD_EVENT_FINISHED) {
3556 r = sd_event_run(e, (uint64_t) -1);
3557 if (r < 0)
3558 return r;
3559 }
3560
3561 return e->exit_code;
3562 }
3563
3564 _public_ int sd_event_get_fd(sd_event *e) {
3565
3566 assert_return(e, -EINVAL);
3567 assert_return(e = event_resolve(e), -ENOPKG);
3568 assert_return(!event_pid_changed(e), -ECHILD);
3569
3570 return e->epoll_fd;
3571 }
3572
3573 _public_ int sd_event_get_state(sd_event *e) {
3574 assert_return(e, -EINVAL);
3575 assert_return(e = event_resolve(e), -ENOPKG);
3576 assert_return(!event_pid_changed(e), -ECHILD);
3577
3578 return e->state;
3579 }
3580
3581 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3582 assert_return(e, -EINVAL);
3583 assert_return(e = event_resolve(e), -ENOPKG);
3584 assert_return(code, -EINVAL);
3585 assert_return(!event_pid_changed(e), -ECHILD);
3586
3587 if (!e->exit_requested)
3588 return -ENODATA;
3589
3590 *code = e->exit_code;
3591 return 0;
3592 }
3593
3594 _public_ int sd_event_exit(sd_event *e, int code) {
3595 assert_return(e, -EINVAL);
3596 assert_return(e = event_resolve(e), -ENOPKG);
3597 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3598 assert_return(!event_pid_changed(e), -ECHILD);
3599
3600 e->exit_requested = true;
3601 e->exit_code = code;
3602
3603 return 0;
3604 }
3605
3606 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3607 assert_return(e, -EINVAL);
3608 assert_return(e = event_resolve(e), -ENOPKG);
3609 assert_return(usec, -EINVAL);
3610 assert_return(!event_pid_changed(e), -ECHILD);
3611
3612 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3613 return -EOPNOTSUPP;
3614
3615 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3616 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3617 * the purpose of getting the time this doesn't matter. */
3618 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3619 return -EOPNOTSUPP;
3620
3621 if (!triple_timestamp_is_set(&e->timestamp)) {
3622 /* Implicitly fall back to now() if we never ran
3623 * before and thus have no cached time. */
3624 *usec = now(clock);
3625 return 1;
3626 }
3627
3628 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3629 return 0;
3630 }
3631
3632 _public_ int sd_event_default(sd_event **ret) {
3633 sd_event *e = NULL;
3634 int r;
3635
3636 if (!ret)
3637 return !!default_event;
3638
3639 if (default_event) {
3640 *ret = sd_event_ref(default_event);
3641 return 0;
3642 }
3643
3644 r = sd_event_new(&e);
3645 if (r < 0)
3646 return r;
3647
3648 e->default_event_ptr = &default_event;
3649 e->tid = gettid();
3650 default_event = e;
3651
3652 *ret = e;
3653 return 1;
3654 }
3655
3656 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3657 assert_return(e, -EINVAL);
3658 assert_return(e = event_resolve(e), -ENOPKG);
3659 assert_return(tid, -EINVAL);
3660 assert_return(!event_pid_changed(e), -ECHILD);
3661
3662 if (e->tid != 0) {
3663 *tid = e->tid;
3664 return 0;
3665 }
3666
3667 return -ENXIO;
3668 }
3669
3670 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3671 int r;
3672
3673 assert_return(e, -EINVAL);
3674 assert_return(e = event_resolve(e), -ENOPKG);
3675 assert_return(!event_pid_changed(e), -ECHILD);
3676
3677 if (e->watchdog == !!b)
3678 return e->watchdog;
3679
3680 if (b) {
3681 struct epoll_event ev;
3682
3683 r = sd_watchdog_enabled(false, &e->watchdog_period);
3684 if (r <= 0)
3685 return r;
3686
3687 /* Issue first ping immediately */
3688 sd_notify(false, "WATCHDOG=1");
3689 e->watchdog_last = now(CLOCK_MONOTONIC);
3690
3691 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3692 if (e->watchdog_fd < 0)
3693 return -errno;
3694
3695 r = arm_watchdog(e);
3696 if (r < 0)
3697 goto fail;
3698
3699 ev = (struct epoll_event) {
3700 .events = EPOLLIN,
3701 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3702 };
3703
3704 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3705 if (r < 0) {
3706 r = -errno;
3707 goto fail;
3708 }
3709
3710 } else {
3711 if (e->watchdog_fd >= 0) {
3712 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3713 e->watchdog_fd = safe_close(e->watchdog_fd);
3714 }
3715 }
3716
3717 e->watchdog = !!b;
3718 return e->watchdog;
3719
3720 fail:
3721 e->watchdog_fd = safe_close(e->watchdog_fd);
3722 return r;
3723 }
3724
3725 _public_ int sd_event_get_watchdog(sd_event *e) {
3726 assert_return(e, -EINVAL);
3727 assert_return(e = event_resolve(e), -ENOPKG);
3728 assert_return(!event_pid_changed(e), -ECHILD);
3729
3730 return e->watchdog;
3731 }
3732
3733 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3734 assert_return(e, -EINVAL);
3735 assert_return(e = event_resolve(e), -ENOPKG);
3736 assert_return(!event_pid_changed(e), -ECHILD);
3737
3738 *ret = e->iteration;
3739 return 0;
3740 }
3741
3742 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3743 assert_return(s, -EINVAL);
3744
3745 s->destroy_callback = callback;
3746 return 0;
3747 }
3748
3749 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3750 assert_return(s, -EINVAL);
3751
3752 if (ret)
3753 *ret = s->destroy_callback;
3754
3755 return !!s->destroy_callback;
3756 }