]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
trivial: fix spelling in code comments
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "fd-util.h"
13 #include "fs-util.h"
14 #include "hashmap.h"
15 #include "list.h"
16 #include "macro.h"
17 #include "missing.h"
18 #include "prioq.h"
19 #include "process-util.h"
20 #include "set.h"
21 #include "signal-util.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "time-util.h"
25 #include "util.h"
26
27 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
28
29 typedef enum EventSourceType {
30 SOURCE_IO,
31 SOURCE_TIME_REALTIME,
32 SOURCE_TIME_BOOTTIME,
33 SOURCE_TIME_MONOTONIC,
34 SOURCE_TIME_REALTIME_ALARM,
35 SOURCE_TIME_BOOTTIME_ALARM,
36 SOURCE_SIGNAL,
37 SOURCE_CHILD,
38 SOURCE_DEFER,
39 SOURCE_POST,
40 SOURCE_EXIT,
41 SOURCE_WATCHDOG,
42 SOURCE_INOTIFY,
43 _SOURCE_EVENT_SOURCE_TYPE_MAX,
44 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
45 } EventSourceType;
46
47 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
48 [SOURCE_IO] = "io",
49 [SOURCE_TIME_REALTIME] = "realtime",
50 [SOURCE_TIME_BOOTTIME] = "bootime",
51 [SOURCE_TIME_MONOTONIC] = "monotonic",
52 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
53 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
54 [SOURCE_SIGNAL] = "signal",
55 [SOURCE_CHILD] = "child",
56 [SOURCE_DEFER] = "defer",
57 [SOURCE_POST] = "post",
58 [SOURCE_EXIT] = "exit",
59 [SOURCE_WATCHDOG] = "watchdog",
60 [SOURCE_INOTIFY] = "inotify",
61 };
62
63 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
64
65 /* All objects we use in epoll events start with this value, so that
66 * we know how to dispatch it */
67 typedef enum WakeupType {
68 WAKEUP_NONE,
69 WAKEUP_EVENT_SOURCE,
70 WAKEUP_CLOCK_DATA,
71 WAKEUP_SIGNAL_DATA,
72 WAKEUP_INOTIFY_DATA,
73 _WAKEUP_TYPE_MAX,
74 _WAKEUP_TYPE_INVALID = -1,
75 } WakeupType;
76
77 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
78
79 struct inode_data;
80
81 struct sd_event_source {
82 WakeupType wakeup;
83
84 unsigned n_ref;
85
86 sd_event *event;
87 void *userdata;
88 sd_event_handler_t prepare;
89
90 char *description;
91
92 EventSourceType type:5;
93 signed int enabled:3;
94 bool pending:1;
95 bool dispatching:1;
96 bool floating:1;
97
98 int64_t priority;
99 unsigned pending_index;
100 unsigned prepare_index;
101 uint64_t pending_iteration;
102 uint64_t prepare_iteration;
103
104 sd_event_destroy_t destroy_callback;
105
106 LIST_FIELDS(sd_event_source, sources);
107
108 union {
109 struct {
110 sd_event_io_handler_t callback;
111 int fd;
112 uint32_t events;
113 uint32_t revents;
114 bool registered:1;
115 bool owned:1;
116 } io;
117 struct {
118 sd_event_time_handler_t callback;
119 usec_t next, accuracy;
120 unsigned earliest_index;
121 unsigned latest_index;
122 } time;
123 struct {
124 sd_event_signal_handler_t callback;
125 struct signalfd_siginfo siginfo;
126 int sig;
127 } signal;
128 struct {
129 sd_event_child_handler_t callback;
130 siginfo_t siginfo;
131 pid_t pid;
132 int options;
133 } child;
134 struct {
135 sd_event_handler_t callback;
136 } defer;
137 struct {
138 sd_event_handler_t callback;
139 } post;
140 struct {
141 sd_event_handler_t callback;
142 unsigned prioq_index;
143 } exit;
144 struct {
145 sd_event_inotify_handler_t callback;
146 uint32_t mask;
147 struct inode_data *inode_data;
148 LIST_FIELDS(sd_event_source, by_inode_data);
149 } inotify;
150 };
151 };
152
153 struct clock_data {
154 WakeupType wakeup;
155 int fd;
156
157 /* For all clocks we maintain two priority queues each, one
158 * ordered for the earliest times the events may be
159 * dispatched, and one ordered by the latest times they must
160 * have been dispatched. The range between the top entries in
161 * the two prioqs is the time window we can freely schedule
162 * wakeups in */
163
164 Prioq *earliest;
165 Prioq *latest;
166 usec_t next;
167
168 bool needs_rearm:1;
169 };
170
171 struct signal_data {
172 WakeupType wakeup;
173
174 /* For each priority we maintain one signal fd, so that we
175 * only have to dequeue a single event per priority at a
176 * time. */
177
178 int fd;
179 int64_t priority;
180 sigset_t sigset;
181 sd_event_source *current;
182 };
183
184 /* A structure listing all event sources currently watching a specific inode */
185 struct inode_data {
186 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
187 ino_t ino;
188 dev_t dev;
189
190 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
191 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
192 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
193 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
194 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
195 * iteration. */
196 int fd;
197
198 /* The inotify "watch descriptor" */
199 int wd;
200
201 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
202 * most recently been set on the watch descriptor. */
203 uint32_t combined_mask;
204
205 /* All event sources subscribed to this inode */
206 LIST_HEAD(sd_event_source, event_sources);
207
208 /* The inotify object we watch this inode with */
209 struct inotify_data *inotify_data;
210
211 /* A linked list of all inode data objects with fds to close (see above) */
212 LIST_FIELDS(struct inode_data, to_close);
213 };
214
215 /* A structure encapsulating an inotify fd */
216 struct inotify_data {
217 WakeupType wakeup;
218
219 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
220 * a time */
221
222 int fd;
223 int64_t priority;
224
225 Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
226 Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
227
228 /* The buffer we read inotify events into */
229 union inotify_event_buffer buffer;
230 size_t buffer_filled; /* fill level of the buffer */
231
232 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
233 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
234 * the events locally if they can't be coalesced). */
235 unsigned n_pending;
236
237 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
238 * to make it efficient to figure out what inotify objects to process data on next. */
239 LIST_FIELDS(struct inotify_data, buffered);
240 };
241
242 struct sd_event {
243 unsigned n_ref;
244
245 int epoll_fd;
246 int watchdog_fd;
247
248 Prioq *pending;
249 Prioq *prepare;
250
251 /* timerfd_create() only supports these five clocks so far. We
252 * can add support for more clocks when the kernel learns to
253 * deal with them, too. */
254 struct clock_data realtime;
255 struct clock_data boottime;
256 struct clock_data monotonic;
257 struct clock_data realtime_alarm;
258 struct clock_data boottime_alarm;
259
260 usec_t perturb;
261
262 sd_event_source **signal_sources; /* indexed by signal number */
263 Hashmap *signal_data; /* indexed by priority */
264
265 Hashmap *child_sources;
266 unsigned n_enabled_child_sources;
267
268 Set *post_sources;
269
270 Prioq *exit;
271
272 Hashmap *inotify_data; /* indexed by priority */
273
274 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
275 LIST_HEAD(struct inode_data, inode_data_to_close);
276
277 /* A list of inotify objects that already have events buffered which aren't processed yet */
278 LIST_HEAD(struct inotify_data, inotify_data_buffered);
279
280 pid_t original_pid;
281
282 uint64_t iteration;
283 triple_timestamp timestamp;
284 int state;
285
286 bool exit_requested:1;
287 bool need_process_child:1;
288 bool watchdog:1;
289 bool profile_delays:1;
290
291 int exit_code;
292
293 pid_t tid;
294 sd_event **default_event_ptr;
295
296 usec_t watchdog_last, watchdog_period;
297
298 unsigned n_sources;
299
300 LIST_HEAD(sd_event_source, sources);
301
302 usec_t last_run, last_log;
303 unsigned delays[sizeof(usec_t) * 8];
304 };
305
306 static thread_local sd_event *default_event = NULL;
307
308 static void source_disconnect(sd_event_source *s);
309 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
310
311 static sd_event *event_resolve(sd_event *e) {
312 return e == SD_EVENT_DEFAULT ? default_event : e;
313 }
314
315 static int pending_prioq_compare(const void *a, const void *b) {
316 const sd_event_source *x = a, *y = b;
317
318 assert(x->pending);
319 assert(y->pending);
320
321 /* Enabled ones first */
322 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
323 return -1;
324 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
325 return 1;
326
327 /* Lower priority values first */
328 if (x->priority < y->priority)
329 return -1;
330 if (x->priority > y->priority)
331 return 1;
332
333 /* Older entries first */
334 if (x->pending_iteration < y->pending_iteration)
335 return -1;
336 if (x->pending_iteration > y->pending_iteration)
337 return 1;
338
339 return 0;
340 }
341
342 static int prepare_prioq_compare(const void *a, const void *b) {
343 const sd_event_source *x = a, *y = b;
344
345 assert(x->prepare);
346 assert(y->prepare);
347
348 /* Enabled ones first */
349 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
350 return -1;
351 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
352 return 1;
353
354 /* Move most recently prepared ones last, so that we can stop
355 * preparing as soon as we hit one that has already been
356 * prepared in the current iteration */
357 if (x->prepare_iteration < y->prepare_iteration)
358 return -1;
359 if (x->prepare_iteration > y->prepare_iteration)
360 return 1;
361
362 /* Lower priority values first */
363 if (x->priority < y->priority)
364 return -1;
365 if (x->priority > y->priority)
366 return 1;
367
368 return 0;
369 }
370
371 static int earliest_time_prioq_compare(const void *a, const void *b) {
372 const sd_event_source *x = a, *y = b;
373
374 assert(EVENT_SOURCE_IS_TIME(x->type));
375 assert(x->type == y->type);
376
377 /* Enabled ones first */
378 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
379 return -1;
380 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
381 return 1;
382
383 /* Move the pending ones to the end */
384 if (!x->pending && y->pending)
385 return -1;
386 if (x->pending && !y->pending)
387 return 1;
388
389 /* Order by time */
390 if (x->time.next < y->time.next)
391 return -1;
392 if (x->time.next > y->time.next)
393 return 1;
394
395 return 0;
396 }
397
398 static usec_t time_event_source_latest(const sd_event_source *s) {
399 return usec_add(s->time.next, s->time.accuracy);
400 }
401
402 static int latest_time_prioq_compare(const void *a, const void *b) {
403 const sd_event_source *x = a, *y = b;
404
405 assert(EVENT_SOURCE_IS_TIME(x->type));
406 assert(x->type == y->type);
407
408 /* Enabled ones first */
409 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
410 return -1;
411 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
412 return 1;
413
414 /* Move the pending ones to the end */
415 if (!x->pending && y->pending)
416 return -1;
417 if (x->pending && !y->pending)
418 return 1;
419
420 /* Order by time */
421 if (time_event_source_latest(x) < time_event_source_latest(y))
422 return -1;
423 if (time_event_source_latest(x) > time_event_source_latest(y))
424 return 1;
425
426 return 0;
427 }
428
429 static int exit_prioq_compare(const void *a, const void *b) {
430 const sd_event_source *x = a, *y = b;
431
432 assert(x->type == SOURCE_EXIT);
433 assert(y->type == SOURCE_EXIT);
434
435 /* Enabled ones first */
436 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
437 return -1;
438 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
439 return 1;
440
441 /* Lower priority values first */
442 if (x->priority < y->priority)
443 return -1;
444 if (x->priority > y->priority)
445 return 1;
446
447 return 0;
448 }
449
450 static void free_clock_data(struct clock_data *d) {
451 assert(d);
452 assert(d->wakeup == WAKEUP_CLOCK_DATA);
453
454 safe_close(d->fd);
455 prioq_free(d->earliest);
456 prioq_free(d->latest);
457 }
458
459 static sd_event *event_free(sd_event *e) {
460 sd_event_source *s;
461
462 assert(e);
463
464 while ((s = e->sources)) {
465 assert(s->floating);
466 source_disconnect(s);
467 sd_event_source_unref(s);
468 }
469
470 assert(e->n_sources == 0);
471
472 if (e->default_event_ptr)
473 *(e->default_event_ptr) = NULL;
474
475 safe_close(e->epoll_fd);
476 safe_close(e->watchdog_fd);
477
478 free_clock_data(&e->realtime);
479 free_clock_data(&e->boottime);
480 free_clock_data(&e->monotonic);
481 free_clock_data(&e->realtime_alarm);
482 free_clock_data(&e->boottime_alarm);
483
484 prioq_free(e->pending);
485 prioq_free(e->prepare);
486 prioq_free(e->exit);
487
488 free(e->signal_sources);
489 hashmap_free(e->signal_data);
490
491 hashmap_free(e->inotify_data);
492
493 hashmap_free(e->child_sources);
494 set_free(e->post_sources);
495
496 return mfree(e);
497 }
498
499 _public_ int sd_event_new(sd_event** ret) {
500 sd_event *e;
501 int r;
502
503 assert_return(ret, -EINVAL);
504
505 e = new(sd_event, 1);
506 if (!e)
507 return -ENOMEM;
508
509 *e = (sd_event) {
510 .n_ref = 1,
511 .epoll_fd = -1,
512 .watchdog_fd = -1,
513 .realtime.wakeup = WAKEUP_CLOCK_DATA,
514 .realtime.fd = -1,
515 .realtime.next = USEC_INFINITY,
516 .boottime.wakeup = WAKEUP_CLOCK_DATA,
517 .boottime.fd = -1,
518 .boottime.next = USEC_INFINITY,
519 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
520 .monotonic.fd = -1,
521 .monotonic.next = USEC_INFINITY,
522 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
523 .realtime_alarm.fd = -1,
524 .realtime_alarm.next = USEC_INFINITY,
525 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
526 .boottime_alarm.fd = -1,
527 .boottime_alarm.next = USEC_INFINITY,
528 .perturb = USEC_INFINITY,
529 .original_pid = getpid_cached(),
530 };
531
532 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
533 if (r < 0)
534 goto fail;
535
536 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
537 if (e->epoll_fd < 0) {
538 r = -errno;
539 goto fail;
540 }
541
542 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
543
544 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
545 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
546 e->profile_delays = true;
547 }
548
549 *ret = e;
550 return 0;
551
552 fail:
553 event_free(e);
554 return r;
555 }
556
557 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
558
559 static bool event_pid_changed(sd_event *e) {
560 assert(e);
561
562 /* We don't support people creating an event loop and keeping
563 * it around over a fork(). Let's complain. */
564
565 return e->original_pid != getpid_cached();
566 }
567
568 static void source_io_unregister(sd_event_source *s) {
569 int r;
570
571 assert(s);
572 assert(s->type == SOURCE_IO);
573
574 if (event_pid_changed(s->event))
575 return;
576
577 if (!s->io.registered)
578 return;
579
580 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
581 if (r < 0)
582 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
583 strna(s->description), event_source_type_to_string(s->type));
584
585 s->io.registered = false;
586 }
587
588 static int source_io_register(
589 sd_event_source *s,
590 int enabled,
591 uint32_t events) {
592
593 struct epoll_event ev;
594 int r;
595
596 assert(s);
597 assert(s->type == SOURCE_IO);
598 assert(enabled != SD_EVENT_OFF);
599
600 ev = (struct epoll_event) {
601 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
602 .data.ptr = s,
603 };
604
605 if (s->io.registered)
606 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
607 else
608 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
609 if (r < 0)
610 return -errno;
611
612 s->io.registered = true;
613
614 return 0;
615 }
616
617 static clockid_t event_source_type_to_clock(EventSourceType t) {
618
619 switch (t) {
620
621 case SOURCE_TIME_REALTIME:
622 return CLOCK_REALTIME;
623
624 case SOURCE_TIME_BOOTTIME:
625 return CLOCK_BOOTTIME;
626
627 case SOURCE_TIME_MONOTONIC:
628 return CLOCK_MONOTONIC;
629
630 case SOURCE_TIME_REALTIME_ALARM:
631 return CLOCK_REALTIME_ALARM;
632
633 case SOURCE_TIME_BOOTTIME_ALARM:
634 return CLOCK_BOOTTIME_ALARM;
635
636 default:
637 return (clockid_t) -1;
638 }
639 }
640
641 static EventSourceType clock_to_event_source_type(clockid_t clock) {
642
643 switch (clock) {
644
645 case CLOCK_REALTIME:
646 return SOURCE_TIME_REALTIME;
647
648 case CLOCK_BOOTTIME:
649 return SOURCE_TIME_BOOTTIME;
650
651 case CLOCK_MONOTONIC:
652 return SOURCE_TIME_MONOTONIC;
653
654 case CLOCK_REALTIME_ALARM:
655 return SOURCE_TIME_REALTIME_ALARM;
656
657 case CLOCK_BOOTTIME_ALARM:
658 return SOURCE_TIME_BOOTTIME_ALARM;
659
660 default:
661 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
662 }
663 }
664
665 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
666 assert(e);
667
668 switch (t) {
669
670 case SOURCE_TIME_REALTIME:
671 return &e->realtime;
672
673 case SOURCE_TIME_BOOTTIME:
674 return &e->boottime;
675
676 case SOURCE_TIME_MONOTONIC:
677 return &e->monotonic;
678
679 case SOURCE_TIME_REALTIME_ALARM:
680 return &e->realtime_alarm;
681
682 case SOURCE_TIME_BOOTTIME_ALARM:
683 return &e->boottime_alarm;
684
685 default:
686 return NULL;
687 }
688 }
689
690 static int event_make_signal_data(
691 sd_event *e,
692 int sig,
693 struct signal_data **ret) {
694
695 struct epoll_event ev;
696 struct signal_data *d;
697 bool added = false;
698 sigset_t ss_copy;
699 int64_t priority;
700 int r;
701
702 assert(e);
703
704 if (event_pid_changed(e))
705 return -ECHILD;
706
707 if (e->signal_sources && e->signal_sources[sig])
708 priority = e->signal_sources[sig]->priority;
709 else
710 priority = SD_EVENT_PRIORITY_NORMAL;
711
712 d = hashmap_get(e->signal_data, &priority);
713 if (d) {
714 if (sigismember(&d->sigset, sig) > 0) {
715 if (ret)
716 *ret = d;
717 return 0;
718 }
719 } else {
720 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
721 if (r < 0)
722 return r;
723
724 d = new(struct signal_data, 1);
725 if (!d)
726 return -ENOMEM;
727
728 *d = (struct signal_data) {
729 .wakeup = WAKEUP_SIGNAL_DATA,
730 .fd = -1,
731 .priority = priority,
732 };
733
734 r = hashmap_put(e->signal_data, &d->priority, d);
735 if (r < 0) {
736 free(d);
737 return r;
738 }
739
740 added = true;
741 }
742
743 ss_copy = d->sigset;
744 assert_se(sigaddset(&ss_copy, sig) >= 0);
745
746 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
747 if (r < 0) {
748 r = -errno;
749 goto fail;
750 }
751
752 d->sigset = ss_copy;
753
754 if (d->fd >= 0) {
755 if (ret)
756 *ret = d;
757 return 0;
758 }
759
760 d->fd = fd_move_above_stdio(r);
761
762 ev = (struct epoll_event) {
763 .events = EPOLLIN,
764 .data.ptr = d,
765 };
766
767 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
768 if (r < 0) {
769 r = -errno;
770 goto fail;
771 }
772
773 if (ret)
774 *ret = d;
775
776 return 0;
777
778 fail:
779 if (added) {
780 d->fd = safe_close(d->fd);
781 hashmap_remove(e->signal_data, &d->priority);
782 free(d);
783 }
784
785 return r;
786 }
787
788 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
789 assert(e);
790 assert(d);
791
792 /* Turns off the specified signal in the signal data
793 * object. If the signal mask of the object becomes empty that
794 * way removes it. */
795
796 if (sigismember(&d->sigset, sig) == 0)
797 return;
798
799 assert_se(sigdelset(&d->sigset, sig) >= 0);
800
801 if (sigisemptyset(&d->sigset)) {
802
803 /* If all the mask is all-zero we can get rid of the structure */
804 hashmap_remove(e->signal_data, &d->priority);
805 safe_close(d->fd);
806 free(d);
807 return;
808 }
809
810 assert(d->fd >= 0);
811
812 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
813 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
814 }
815
816 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
817 struct signal_data *d;
818 static const int64_t zero_priority = 0;
819
820 assert(e);
821
822 /* Rechecks if the specified signal is still something we are
823 * interested in. If not, we'll unmask it, and possibly drop
824 * the signalfd for it. */
825
826 if (sig == SIGCHLD &&
827 e->n_enabled_child_sources > 0)
828 return;
829
830 if (e->signal_sources &&
831 e->signal_sources[sig] &&
832 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
833 return;
834
835 /*
836 * The specified signal might be enabled in three different queues:
837 *
838 * 1) the one that belongs to the priority passed (if it is non-NULL)
839 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
840 * 3) the 0 priority (to cover the SIGCHLD case)
841 *
842 * Hence, let's remove it from all three here.
843 */
844
845 if (priority) {
846 d = hashmap_get(e->signal_data, priority);
847 if (d)
848 event_unmask_signal_data(e, d, sig);
849 }
850
851 if (e->signal_sources && e->signal_sources[sig]) {
852 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
853 if (d)
854 event_unmask_signal_data(e, d, sig);
855 }
856
857 d = hashmap_get(e->signal_data, &zero_priority);
858 if (d)
859 event_unmask_signal_data(e, d, sig);
860 }
861
862 static void source_disconnect(sd_event_source *s) {
863 sd_event *event;
864
865 assert(s);
866
867 if (!s->event)
868 return;
869
870 assert(s->event->n_sources > 0);
871
872 switch (s->type) {
873
874 case SOURCE_IO:
875 if (s->io.fd >= 0)
876 source_io_unregister(s);
877
878 break;
879
880 case SOURCE_TIME_REALTIME:
881 case SOURCE_TIME_BOOTTIME:
882 case SOURCE_TIME_MONOTONIC:
883 case SOURCE_TIME_REALTIME_ALARM:
884 case SOURCE_TIME_BOOTTIME_ALARM: {
885 struct clock_data *d;
886
887 d = event_get_clock_data(s->event, s->type);
888 assert(d);
889
890 prioq_remove(d->earliest, s, &s->time.earliest_index);
891 prioq_remove(d->latest, s, &s->time.latest_index);
892 d->needs_rearm = true;
893 break;
894 }
895
896 case SOURCE_SIGNAL:
897 if (s->signal.sig > 0) {
898
899 if (s->event->signal_sources)
900 s->event->signal_sources[s->signal.sig] = NULL;
901
902 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
903 }
904
905 break;
906
907 case SOURCE_CHILD:
908 if (s->child.pid > 0) {
909 if (s->enabled != SD_EVENT_OFF) {
910 assert(s->event->n_enabled_child_sources > 0);
911 s->event->n_enabled_child_sources--;
912 }
913
914 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
915 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
916 }
917
918 break;
919
920 case SOURCE_DEFER:
921 /* nothing */
922 break;
923
924 case SOURCE_POST:
925 set_remove(s->event->post_sources, s);
926 break;
927
928 case SOURCE_EXIT:
929 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
930 break;
931
932 case SOURCE_INOTIFY: {
933 struct inode_data *inode_data;
934
935 inode_data = s->inotify.inode_data;
936 if (inode_data) {
937 struct inotify_data *inotify_data;
938 assert_se(inotify_data = inode_data->inotify_data);
939
940 /* Detach this event source from the inode object */
941 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
942 s->inotify.inode_data = NULL;
943
944 if (s->pending) {
945 assert(inotify_data->n_pending > 0);
946 inotify_data->n_pending--;
947 }
948
949 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
950 * continued to being watched. That's because inotify doesn't really have an API for that: we
951 * can only change watch masks with access to the original inode either by fd or by path. But
952 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
953 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
954 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
955 * there), but given the need for open_by_handle_at() which is privileged and not universally
956 * available this would be quite an incomplete solution. Hence we go the other way, leave the
957 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
958 * anymore after reception. Yes, this sucks, but … Linux … */
959
960 /* Maybe release the inode data (and its inotify) */
961 event_gc_inode_data(s->event, inode_data);
962 }
963
964 break;
965 }
966
967 default:
968 assert_not_reached("Wut? I shouldn't exist.");
969 }
970
971 if (s->pending)
972 prioq_remove(s->event->pending, s, &s->pending_index);
973
974 if (s->prepare)
975 prioq_remove(s->event->prepare, s, &s->prepare_index);
976
977 event = s->event;
978
979 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
980 s->event = NULL;
981 LIST_REMOVE(sources, event->sources, s);
982 event->n_sources--;
983
984 if (!s->floating)
985 sd_event_unref(event);
986 }
987
988 static void source_free(sd_event_source *s) {
989 assert(s);
990
991 source_disconnect(s);
992
993 if (s->type == SOURCE_IO && s->io.owned)
994 s->io.fd = safe_close(s->io.fd);
995
996 if (s->destroy_callback)
997 s->destroy_callback(s->userdata);
998
999 free(s->description);
1000 free(s);
1001 }
1002 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
1003
1004 static int source_set_pending(sd_event_source *s, bool b) {
1005 int r;
1006
1007 assert(s);
1008 assert(s->type != SOURCE_EXIT);
1009
1010 if (s->pending == b)
1011 return 0;
1012
1013 s->pending = b;
1014
1015 if (b) {
1016 s->pending_iteration = s->event->iteration;
1017
1018 r = prioq_put(s->event->pending, s, &s->pending_index);
1019 if (r < 0) {
1020 s->pending = false;
1021 return r;
1022 }
1023 } else
1024 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1025
1026 if (EVENT_SOURCE_IS_TIME(s->type)) {
1027 struct clock_data *d;
1028
1029 d = event_get_clock_data(s->event, s->type);
1030 assert(d);
1031
1032 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1033 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1034 d->needs_rearm = true;
1035 }
1036
1037 if (s->type == SOURCE_SIGNAL && !b) {
1038 struct signal_data *d;
1039
1040 d = hashmap_get(s->event->signal_data, &s->priority);
1041 if (d && d->current == s)
1042 d->current = NULL;
1043 }
1044
1045 if (s->type == SOURCE_INOTIFY) {
1046
1047 assert(s->inotify.inode_data);
1048 assert(s->inotify.inode_data->inotify_data);
1049
1050 if (b)
1051 s->inotify.inode_data->inotify_data->n_pending ++;
1052 else {
1053 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1054 s->inotify.inode_data->inotify_data->n_pending --;
1055 }
1056 }
1057
1058 return 0;
1059 }
1060
1061 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1062 sd_event_source *s;
1063
1064 assert(e);
1065
1066 s = new(sd_event_source, 1);
1067 if (!s)
1068 return NULL;
1069
1070 *s = (struct sd_event_source) {
1071 .n_ref = 1,
1072 .event = e,
1073 .floating = floating,
1074 .type = type,
1075 .pending_index = PRIOQ_IDX_NULL,
1076 .prepare_index = PRIOQ_IDX_NULL,
1077 };
1078
1079 if (!floating)
1080 sd_event_ref(e);
1081
1082 LIST_PREPEND(sources, e->sources, s);
1083 e->n_sources++;
1084
1085 return s;
1086 }
1087
1088 _public_ int sd_event_add_io(
1089 sd_event *e,
1090 sd_event_source **ret,
1091 int fd,
1092 uint32_t events,
1093 sd_event_io_handler_t callback,
1094 void *userdata) {
1095
1096 _cleanup_(source_freep) sd_event_source *s = NULL;
1097 int r;
1098
1099 assert_return(e, -EINVAL);
1100 assert_return(e = event_resolve(e), -ENOPKG);
1101 assert_return(fd >= 0, -EBADF);
1102 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1103 assert_return(callback, -EINVAL);
1104 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1105 assert_return(!event_pid_changed(e), -ECHILD);
1106
1107 s = source_new(e, !ret, SOURCE_IO);
1108 if (!s)
1109 return -ENOMEM;
1110
1111 s->wakeup = WAKEUP_EVENT_SOURCE;
1112 s->io.fd = fd;
1113 s->io.events = events;
1114 s->io.callback = callback;
1115 s->userdata = userdata;
1116 s->enabled = SD_EVENT_ON;
1117
1118 r = source_io_register(s, s->enabled, events);
1119 if (r < 0)
1120 return r;
1121
1122 if (ret)
1123 *ret = s;
1124 TAKE_PTR(s);
1125
1126 return 0;
1127 }
1128
1129 static void initialize_perturb(sd_event *e) {
1130 sd_id128_t bootid = {};
1131
1132 /* When we sleep for longer, we try to realign the wakeup to
1133 the same time within each minute/second/250ms, so that
1134 events all across the system can be coalesced into a single
1135 CPU wakeup. However, let's take some system-specific
1136 randomness for this value, so that in a network of systems
1137 with synced clocks timer events are distributed a
1138 bit. Here, we calculate a perturbation usec offset from the
1139 boot ID. */
1140
1141 if (_likely_(e->perturb != USEC_INFINITY))
1142 return;
1143
1144 if (sd_id128_get_boot(&bootid) >= 0)
1145 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1146 }
1147
1148 static int event_setup_timer_fd(
1149 sd_event *e,
1150 struct clock_data *d,
1151 clockid_t clock) {
1152
1153 struct epoll_event ev;
1154 int r, fd;
1155
1156 assert(e);
1157 assert(d);
1158
1159 if (_likely_(d->fd >= 0))
1160 return 0;
1161
1162 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1163 if (fd < 0)
1164 return -errno;
1165
1166 fd = fd_move_above_stdio(fd);
1167
1168 ev = (struct epoll_event) {
1169 .events = EPOLLIN,
1170 .data.ptr = d,
1171 };
1172
1173 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1174 if (r < 0) {
1175 safe_close(fd);
1176 return -errno;
1177 }
1178
1179 d->fd = fd;
1180 return 0;
1181 }
1182
1183 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1184 assert(s);
1185
1186 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1187 }
1188
1189 _public_ int sd_event_add_time(
1190 sd_event *e,
1191 sd_event_source **ret,
1192 clockid_t clock,
1193 uint64_t usec,
1194 uint64_t accuracy,
1195 sd_event_time_handler_t callback,
1196 void *userdata) {
1197
1198 EventSourceType type;
1199 _cleanup_(source_freep) sd_event_source *s = NULL;
1200 struct clock_data *d;
1201 int r;
1202
1203 assert_return(e, -EINVAL);
1204 assert_return(e = event_resolve(e), -ENOPKG);
1205 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1206 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1207 assert_return(!event_pid_changed(e), -ECHILD);
1208
1209 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1210 return -EOPNOTSUPP;
1211
1212 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1213 if (type < 0)
1214 return -EOPNOTSUPP;
1215
1216 if (!callback)
1217 callback = time_exit_callback;
1218
1219 d = event_get_clock_data(e, type);
1220 assert(d);
1221
1222 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1223 if (r < 0)
1224 return r;
1225
1226 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1227 if (r < 0)
1228 return r;
1229
1230 if (d->fd < 0) {
1231 r = event_setup_timer_fd(e, d, clock);
1232 if (r < 0)
1233 return r;
1234 }
1235
1236 s = source_new(e, !ret, type);
1237 if (!s)
1238 return -ENOMEM;
1239
1240 s->time.next = usec;
1241 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1242 s->time.callback = callback;
1243 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1244 s->userdata = userdata;
1245 s->enabled = SD_EVENT_ONESHOT;
1246
1247 d->needs_rearm = true;
1248
1249 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1250 if (r < 0)
1251 return r;
1252
1253 r = prioq_put(d->latest, s, &s->time.latest_index);
1254 if (r < 0)
1255 return r;
1256
1257 if (ret)
1258 *ret = s;
1259 TAKE_PTR(s);
1260
1261 return 0;
1262 }
1263
1264 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1265 assert(s);
1266
1267 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1268 }
1269
1270 _public_ int sd_event_add_signal(
1271 sd_event *e,
1272 sd_event_source **ret,
1273 int sig,
1274 sd_event_signal_handler_t callback,
1275 void *userdata) {
1276
1277 _cleanup_(source_freep) sd_event_source *s = NULL;
1278 struct signal_data *d;
1279 sigset_t ss;
1280 int r;
1281
1282 assert_return(e, -EINVAL);
1283 assert_return(e = event_resolve(e), -ENOPKG);
1284 assert_return(SIGNAL_VALID(sig), -EINVAL);
1285 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1286 assert_return(!event_pid_changed(e), -ECHILD);
1287
1288 if (!callback)
1289 callback = signal_exit_callback;
1290
1291 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1292 if (r != 0)
1293 return -r;
1294
1295 if (!sigismember(&ss, sig))
1296 return -EBUSY;
1297
1298 if (!e->signal_sources) {
1299 e->signal_sources = new0(sd_event_source*, _NSIG);
1300 if (!e->signal_sources)
1301 return -ENOMEM;
1302 } else if (e->signal_sources[sig])
1303 return -EBUSY;
1304
1305 s = source_new(e, !ret, SOURCE_SIGNAL);
1306 if (!s)
1307 return -ENOMEM;
1308
1309 s->signal.sig = sig;
1310 s->signal.callback = callback;
1311 s->userdata = userdata;
1312 s->enabled = SD_EVENT_ON;
1313
1314 e->signal_sources[sig] = s;
1315
1316 r = event_make_signal_data(e, sig, &d);
1317 if (r < 0)
1318 return r;
1319
1320 /* Use the signal name as description for the event source by default */
1321 (void) sd_event_source_set_description(s, signal_to_string(sig));
1322
1323 if (ret)
1324 *ret = s;
1325 TAKE_PTR(s);
1326
1327 return 0;
1328 }
1329
1330 _public_ int sd_event_add_child(
1331 sd_event *e,
1332 sd_event_source **ret,
1333 pid_t pid,
1334 int options,
1335 sd_event_child_handler_t callback,
1336 void *userdata) {
1337
1338 _cleanup_(source_freep) sd_event_source *s = NULL;
1339 int r;
1340
1341 assert_return(e, -EINVAL);
1342 assert_return(e = event_resolve(e), -ENOPKG);
1343 assert_return(pid > 1, -EINVAL);
1344 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1345 assert_return(options != 0, -EINVAL);
1346 assert_return(callback, -EINVAL);
1347 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1348 assert_return(!event_pid_changed(e), -ECHILD);
1349
1350 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1351 if (r < 0)
1352 return r;
1353
1354 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1355 return -EBUSY;
1356
1357 s = source_new(e, !ret, SOURCE_CHILD);
1358 if (!s)
1359 return -ENOMEM;
1360
1361 s->child.pid = pid;
1362 s->child.options = options;
1363 s->child.callback = callback;
1364 s->userdata = userdata;
1365 s->enabled = SD_EVENT_ONESHOT;
1366
1367 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1368 if (r < 0)
1369 return r;
1370
1371 e->n_enabled_child_sources++;
1372
1373 r = event_make_signal_data(e, SIGCHLD, NULL);
1374 if (r < 0) {
1375 e->n_enabled_child_sources--;
1376 return r;
1377 }
1378
1379 e->need_process_child = true;
1380
1381 if (ret)
1382 *ret = s;
1383 TAKE_PTR(s);
1384
1385 return 0;
1386 }
1387
1388 _public_ int sd_event_add_defer(
1389 sd_event *e,
1390 sd_event_source **ret,
1391 sd_event_handler_t callback,
1392 void *userdata) {
1393
1394 _cleanup_(source_freep) sd_event_source *s = NULL;
1395 int r;
1396
1397 assert_return(e, -EINVAL);
1398 assert_return(e = event_resolve(e), -ENOPKG);
1399 assert_return(callback, -EINVAL);
1400 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1401 assert_return(!event_pid_changed(e), -ECHILD);
1402
1403 s = source_new(e, !ret, SOURCE_DEFER);
1404 if (!s)
1405 return -ENOMEM;
1406
1407 s->defer.callback = callback;
1408 s->userdata = userdata;
1409 s->enabled = SD_EVENT_ONESHOT;
1410
1411 r = source_set_pending(s, true);
1412 if (r < 0)
1413 return r;
1414
1415 if (ret)
1416 *ret = s;
1417 TAKE_PTR(s);
1418
1419 return 0;
1420 }
1421
1422 _public_ int sd_event_add_post(
1423 sd_event *e,
1424 sd_event_source **ret,
1425 sd_event_handler_t callback,
1426 void *userdata) {
1427
1428 _cleanup_(source_freep) sd_event_source *s = NULL;
1429 int r;
1430
1431 assert_return(e, -EINVAL);
1432 assert_return(e = event_resolve(e), -ENOPKG);
1433 assert_return(callback, -EINVAL);
1434 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1435 assert_return(!event_pid_changed(e), -ECHILD);
1436
1437 r = set_ensure_allocated(&e->post_sources, NULL);
1438 if (r < 0)
1439 return r;
1440
1441 s = source_new(e, !ret, SOURCE_POST);
1442 if (!s)
1443 return -ENOMEM;
1444
1445 s->post.callback = callback;
1446 s->userdata = userdata;
1447 s->enabled = SD_EVENT_ON;
1448
1449 r = set_put(e->post_sources, s);
1450 if (r < 0)
1451 return r;
1452
1453 if (ret)
1454 *ret = s;
1455 TAKE_PTR(s);
1456
1457 return 0;
1458 }
1459
1460 _public_ int sd_event_add_exit(
1461 sd_event *e,
1462 sd_event_source **ret,
1463 sd_event_handler_t callback,
1464 void *userdata) {
1465
1466 _cleanup_(source_freep) sd_event_source *s = NULL;
1467 int r;
1468
1469 assert_return(e, -EINVAL);
1470 assert_return(e = event_resolve(e), -ENOPKG);
1471 assert_return(callback, -EINVAL);
1472 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1473 assert_return(!event_pid_changed(e), -ECHILD);
1474
1475 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1476 if (r < 0)
1477 return r;
1478
1479 s = source_new(e, !ret, SOURCE_EXIT);
1480 if (!s)
1481 return -ENOMEM;
1482
1483 s->exit.callback = callback;
1484 s->userdata = userdata;
1485 s->exit.prioq_index = PRIOQ_IDX_NULL;
1486 s->enabled = SD_EVENT_ONESHOT;
1487
1488 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1489 if (r < 0)
1490 return r;
1491
1492 if (ret)
1493 *ret = s;
1494 TAKE_PTR(s);
1495
1496 return 0;
1497 }
1498
1499 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1500 assert(e);
1501
1502 if (!d)
1503 return;
1504
1505 assert(hashmap_isempty(d->inodes));
1506 assert(hashmap_isempty(d->wd));
1507
1508 if (d->buffer_filled > 0)
1509 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1510
1511 hashmap_free(d->inodes);
1512 hashmap_free(d->wd);
1513
1514 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1515
1516 if (d->fd >= 0) {
1517 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1518 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1519
1520 safe_close(d->fd);
1521 }
1522 free(d);
1523 }
1524
1525 static int event_make_inotify_data(
1526 sd_event *e,
1527 int64_t priority,
1528 struct inotify_data **ret) {
1529
1530 _cleanup_close_ int fd = -1;
1531 struct inotify_data *d;
1532 struct epoll_event ev;
1533 int r;
1534
1535 assert(e);
1536
1537 d = hashmap_get(e->inotify_data, &priority);
1538 if (d) {
1539 if (ret)
1540 *ret = d;
1541 return 0;
1542 }
1543
1544 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1545 if (fd < 0)
1546 return -errno;
1547
1548 fd = fd_move_above_stdio(fd);
1549
1550 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1551 if (r < 0)
1552 return r;
1553
1554 d = new(struct inotify_data, 1);
1555 if (!d)
1556 return -ENOMEM;
1557
1558 *d = (struct inotify_data) {
1559 .wakeup = WAKEUP_INOTIFY_DATA,
1560 .fd = TAKE_FD(fd),
1561 .priority = priority,
1562 };
1563
1564 r = hashmap_put(e->inotify_data, &d->priority, d);
1565 if (r < 0) {
1566 d->fd = safe_close(d->fd);
1567 free(d);
1568 return r;
1569 }
1570
1571 ev = (struct epoll_event) {
1572 .events = EPOLLIN,
1573 .data.ptr = d,
1574 };
1575
1576 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1577 r = -errno;
1578 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1579 * remove the fd from the epoll first, which we don't want as we couldn't
1580 * add it in the first place. */
1581 event_free_inotify_data(e, d);
1582 return r;
1583 }
1584
1585 if (ret)
1586 *ret = d;
1587
1588 return 1;
1589 }
1590
1591 static int inode_data_compare(const void *a, const void *b) {
1592 const struct inode_data *x = a, *y = b;
1593
1594 assert(x);
1595 assert(y);
1596
1597 if (x->dev < y->dev)
1598 return -1;
1599 if (x->dev > y->dev)
1600 return 1;
1601
1602 if (x->ino < y->ino)
1603 return -1;
1604 if (x->ino > y->ino)
1605 return 1;
1606
1607 return 0;
1608 }
1609
1610 static void inode_data_hash_func(const void *p, struct siphash *state) {
1611 const struct inode_data *d = p;
1612
1613 assert(p);
1614
1615 siphash24_compress(&d->dev, sizeof(d->dev), state);
1616 siphash24_compress(&d->ino, sizeof(d->ino), state);
1617 }
1618
1619 const struct hash_ops inode_data_hash_ops = {
1620 .hash = inode_data_hash_func,
1621 .compare = inode_data_compare
1622 };
1623
1624 static void event_free_inode_data(
1625 sd_event *e,
1626 struct inode_data *d) {
1627
1628 assert(e);
1629
1630 if (!d)
1631 return;
1632
1633 assert(!d->event_sources);
1634
1635 if (d->fd >= 0) {
1636 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1637 safe_close(d->fd);
1638 }
1639
1640 if (d->inotify_data) {
1641
1642 if (d->wd >= 0) {
1643 if (d->inotify_data->fd >= 0) {
1644 /* So here's a problem. At the time this runs the watch descriptor might already be
1645 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1646 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1647 * likely case to happen. */
1648
1649 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1650 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1651 }
1652
1653 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1654 }
1655
1656 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1657 }
1658
1659 free(d);
1660 }
1661
1662 static void event_gc_inode_data(
1663 sd_event *e,
1664 struct inode_data *d) {
1665
1666 struct inotify_data *inotify_data;
1667
1668 assert(e);
1669
1670 if (!d)
1671 return;
1672
1673 if (d->event_sources)
1674 return;
1675
1676 inotify_data = d->inotify_data;
1677 event_free_inode_data(e, d);
1678
1679 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1680 event_free_inotify_data(e, inotify_data);
1681 }
1682
1683 static int event_make_inode_data(
1684 sd_event *e,
1685 struct inotify_data *inotify_data,
1686 dev_t dev,
1687 ino_t ino,
1688 struct inode_data **ret) {
1689
1690 struct inode_data *d, key;
1691 int r;
1692
1693 assert(e);
1694 assert(inotify_data);
1695
1696 key = (struct inode_data) {
1697 .ino = ino,
1698 .dev = dev,
1699 };
1700
1701 d = hashmap_get(inotify_data->inodes, &key);
1702 if (d) {
1703 if (ret)
1704 *ret = d;
1705
1706 return 0;
1707 }
1708
1709 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1710 if (r < 0)
1711 return r;
1712
1713 d = new(struct inode_data, 1);
1714 if (!d)
1715 return -ENOMEM;
1716
1717 *d = (struct inode_data) {
1718 .dev = dev,
1719 .ino = ino,
1720 .wd = -1,
1721 .fd = -1,
1722 .inotify_data = inotify_data,
1723 };
1724
1725 r = hashmap_put(inotify_data->inodes, d, d);
1726 if (r < 0) {
1727 free(d);
1728 return r;
1729 }
1730
1731 if (ret)
1732 *ret = d;
1733
1734 return 1;
1735 }
1736
1737 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1738 bool excl_unlink = true;
1739 uint32_t combined = 0;
1740 sd_event_source *s;
1741
1742 assert(d);
1743
1744 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1745 * the IN_EXCL_UNLINK flag is ANDed instead.
1746 *
1747 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1748 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1749 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1750 * events we don't care for client-side. */
1751
1752 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1753
1754 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1755 excl_unlink = false;
1756
1757 combined |= s->inotify.mask;
1758 }
1759
1760 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1761 }
1762
1763 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1764 uint32_t combined_mask;
1765 int wd, r;
1766
1767 assert(d);
1768 assert(d->fd >= 0);
1769
1770 combined_mask = inode_data_determine_mask(d);
1771
1772 if (d->wd >= 0 && combined_mask == d->combined_mask)
1773 return 0;
1774
1775 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1776 if (r < 0)
1777 return r;
1778
1779 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1780 if (wd < 0)
1781 return -errno;
1782
1783 if (d->wd < 0) {
1784 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1785 if (r < 0) {
1786 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1787 return r;
1788 }
1789
1790 d->wd = wd;
1791
1792 } else if (d->wd != wd) {
1793
1794 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1795 (void) inotify_rm_watch(d->fd, wd);
1796 return -EINVAL;
1797 }
1798
1799 d->combined_mask = combined_mask;
1800 return 1;
1801 }
1802
1803 _public_ int sd_event_add_inotify(
1804 sd_event *e,
1805 sd_event_source **ret,
1806 const char *path,
1807 uint32_t mask,
1808 sd_event_inotify_handler_t callback,
1809 void *userdata) {
1810
1811 struct inotify_data *inotify_data = NULL;
1812 struct inode_data *inode_data = NULL;
1813 _cleanup_close_ int fd = -1;
1814 _cleanup_(source_freep) sd_event_source *s = NULL;
1815 struct stat st;
1816 int r;
1817
1818 assert_return(e, -EINVAL);
1819 assert_return(e = event_resolve(e), -ENOPKG);
1820 assert_return(path, -EINVAL);
1821 assert_return(callback, -EINVAL);
1822 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1823 assert_return(!event_pid_changed(e), -ECHILD);
1824
1825 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1826 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1827 * the user can't use them for us. */
1828 if (mask & IN_MASK_ADD)
1829 return -EINVAL;
1830
1831 fd = open(path, O_PATH|O_CLOEXEC|
1832 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1833 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1834 if (fd < 0)
1835 return -errno;
1836
1837 if (fstat(fd, &st) < 0)
1838 return -errno;
1839
1840 s = source_new(e, !ret, SOURCE_INOTIFY);
1841 if (!s)
1842 return -ENOMEM;
1843
1844 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1845 s->inotify.mask = mask;
1846 s->inotify.callback = callback;
1847 s->userdata = userdata;
1848
1849 /* Allocate an inotify object for this priority, and an inode object within it */
1850 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1851 if (r < 0)
1852 return r;
1853
1854 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1855 if (r < 0) {
1856 event_free_inotify_data(e, inotify_data);
1857 return r;
1858 }
1859
1860 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1861 * the event source, until then, for which we need the original inode. */
1862 if (inode_data->fd < 0) {
1863 inode_data->fd = TAKE_FD(fd);
1864 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1865 }
1866
1867 /* Link our event source to the inode data object */
1868 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1869 s->inotify.inode_data = inode_data;
1870
1871 /* Actually realize the watch now */
1872 r = inode_data_realize_watch(e, inode_data);
1873 if (r < 0)
1874 return r;
1875
1876 (void) sd_event_source_set_description(s, path);
1877
1878 if (ret)
1879 *ret = s;
1880 TAKE_PTR(s);
1881
1882 return 0;
1883 }
1884
1885 static sd_event_source* event_source_free(sd_event_source *s) {
1886 if (!s)
1887 return NULL;
1888
1889 /* Here's a special hack: when we are called from a
1890 * dispatch handler we won't free the event source
1891 * immediately, but we will detach the fd from the
1892 * epoll. This way it is safe for the caller to unref
1893 * the event source and immediately close the fd, but
1894 * we still retain a valid event source object after
1895 * the callback. */
1896
1897 if (s->dispatching) {
1898 if (s->type == SOURCE_IO)
1899 source_io_unregister(s);
1900
1901 source_disconnect(s);
1902 } else
1903 source_free(s);
1904
1905 return NULL;
1906 }
1907
1908 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1909
1910 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1911 assert_return(s, -EINVAL);
1912 assert_return(!event_pid_changed(s->event), -ECHILD);
1913
1914 return free_and_strdup(&s->description, description);
1915 }
1916
1917 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1918 assert_return(s, -EINVAL);
1919 assert_return(description, -EINVAL);
1920 assert_return(s->description, -ENXIO);
1921 assert_return(!event_pid_changed(s->event), -ECHILD);
1922
1923 *description = s->description;
1924 return 0;
1925 }
1926
1927 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1928 assert_return(s, NULL);
1929
1930 return s->event;
1931 }
1932
1933 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1934 assert_return(s, -EINVAL);
1935 assert_return(s->type != SOURCE_EXIT, -EDOM);
1936 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1937 assert_return(!event_pid_changed(s->event), -ECHILD);
1938
1939 return s->pending;
1940 }
1941
1942 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1943 assert_return(s, -EINVAL);
1944 assert_return(s->type == SOURCE_IO, -EDOM);
1945 assert_return(!event_pid_changed(s->event), -ECHILD);
1946
1947 return s->io.fd;
1948 }
1949
1950 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1951 int r;
1952
1953 assert_return(s, -EINVAL);
1954 assert_return(fd >= 0, -EBADF);
1955 assert_return(s->type == SOURCE_IO, -EDOM);
1956 assert_return(!event_pid_changed(s->event), -ECHILD);
1957
1958 if (s->io.fd == fd)
1959 return 0;
1960
1961 if (s->enabled == SD_EVENT_OFF) {
1962 s->io.fd = fd;
1963 s->io.registered = false;
1964 } else {
1965 int saved_fd;
1966
1967 saved_fd = s->io.fd;
1968 assert(s->io.registered);
1969
1970 s->io.fd = fd;
1971 s->io.registered = false;
1972
1973 r = source_io_register(s, s->enabled, s->io.events);
1974 if (r < 0) {
1975 s->io.fd = saved_fd;
1976 s->io.registered = true;
1977 return r;
1978 }
1979
1980 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1981 }
1982
1983 return 0;
1984 }
1985
1986 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1987 assert_return(s, -EINVAL);
1988 assert_return(s->type == SOURCE_IO, -EDOM);
1989
1990 return s->io.owned;
1991 }
1992
1993 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1994 assert_return(s, -EINVAL);
1995 assert_return(s->type == SOURCE_IO, -EDOM);
1996
1997 s->io.owned = own;
1998 return 0;
1999 }
2000
2001 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2002 assert_return(s, -EINVAL);
2003 assert_return(events, -EINVAL);
2004 assert_return(s->type == SOURCE_IO, -EDOM);
2005 assert_return(!event_pid_changed(s->event), -ECHILD);
2006
2007 *events = s->io.events;
2008 return 0;
2009 }
2010
2011 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2012 int r;
2013
2014 assert_return(s, -EINVAL);
2015 assert_return(s->type == SOURCE_IO, -EDOM);
2016 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2017 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2018 assert_return(!event_pid_changed(s->event), -ECHILD);
2019
2020 /* edge-triggered updates are never skipped, so we can reset edges */
2021 if (s->io.events == events && !(events & EPOLLET))
2022 return 0;
2023
2024 r = source_set_pending(s, false);
2025 if (r < 0)
2026 return r;
2027
2028 if (s->enabled != SD_EVENT_OFF) {
2029 r = source_io_register(s, s->enabled, events);
2030 if (r < 0)
2031 return r;
2032 }
2033
2034 s->io.events = events;
2035
2036 return 0;
2037 }
2038
2039 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2040 assert_return(s, -EINVAL);
2041 assert_return(revents, -EINVAL);
2042 assert_return(s->type == SOURCE_IO, -EDOM);
2043 assert_return(s->pending, -ENODATA);
2044 assert_return(!event_pid_changed(s->event), -ECHILD);
2045
2046 *revents = s->io.revents;
2047 return 0;
2048 }
2049
2050 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2051 assert_return(s, -EINVAL);
2052 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2053 assert_return(!event_pid_changed(s->event), -ECHILD);
2054
2055 return s->signal.sig;
2056 }
2057
2058 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2059 assert_return(s, -EINVAL);
2060 assert_return(!event_pid_changed(s->event), -ECHILD);
2061
2062 *priority = s->priority;
2063 return 0;
2064 }
2065
2066 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2067 bool rm_inotify = false, rm_inode = false;
2068 struct inotify_data *new_inotify_data = NULL;
2069 struct inode_data *new_inode_data = NULL;
2070 int r;
2071
2072 assert_return(s, -EINVAL);
2073 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2074 assert_return(!event_pid_changed(s->event), -ECHILD);
2075
2076 if (s->priority == priority)
2077 return 0;
2078
2079 if (s->type == SOURCE_INOTIFY) {
2080 struct inode_data *old_inode_data;
2081
2082 assert(s->inotify.inode_data);
2083 old_inode_data = s->inotify.inode_data;
2084
2085 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2086 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2087 * events we allow priority changes only until the first following iteration. */
2088 if (old_inode_data->fd < 0)
2089 return -EOPNOTSUPP;
2090
2091 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2092 if (r < 0)
2093 return r;
2094 rm_inotify = r > 0;
2095
2096 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2097 if (r < 0)
2098 goto fail;
2099 rm_inode = r > 0;
2100
2101 if (new_inode_data->fd < 0) {
2102 /* Duplicate the fd for the new inode object if we don't have any yet */
2103 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2104 if (new_inode_data->fd < 0) {
2105 r = -errno;
2106 goto fail;
2107 }
2108
2109 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2110 }
2111
2112 /* Move the event source to the new inode data structure */
2113 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2114 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2115 s->inotify.inode_data = new_inode_data;
2116
2117 /* Now create the new watch */
2118 r = inode_data_realize_watch(s->event, new_inode_data);
2119 if (r < 0) {
2120 /* Move it back */
2121 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2122 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2123 s->inotify.inode_data = old_inode_data;
2124 goto fail;
2125 }
2126
2127 s->priority = priority;
2128
2129 event_gc_inode_data(s->event, old_inode_data);
2130
2131 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2132 struct signal_data *old, *d;
2133
2134 /* Move us from the signalfd belonging to the old
2135 * priority to the signalfd of the new priority */
2136
2137 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2138
2139 s->priority = priority;
2140
2141 r = event_make_signal_data(s->event, s->signal.sig, &d);
2142 if (r < 0) {
2143 s->priority = old->priority;
2144 return r;
2145 }
2146
2147 event_unmask_signal_data(s->event, old, s->signal.sig);
2148 } else
2149 s->priority = priority;
2150
2151 if (s->pending)
2152 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2153
2154 if (s->prepare)
2155 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2156
2157 if (s->type == SOURCE_EXIT)
2158 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2159
2160 return 0;
2161
2162 fail:
2163 if (rm_inode)
2164 event_free_inode_data(s->event, new_inode_data);
2165
2166 if (rm_inotify)
2167 event_free_inotify_data(s->event, new_inotify_data);
2168
2169 return r;
2170 }
2171
2172 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2173 assert_return(s, -EINVAL);
2174 assert_return(m, -EINVAL);
2175 assert_return(!event_pid_changed(s->event), -ECHILD);
2176
2177 *m = s->enabled;
2178 return 0;
2179 }
2180
2181 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2182 int r;
2183
2184 assert_return(s, -EINVAL);
2185 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2186 assert_return(!event_pid_changed(s->event), -ECHILD);
2187
2188 /* If we are dead anyway, we are fine with turning off
2189 * sources, but everything else needs to fail. */
2190 if (s->event->state == SD_EVENT_FINISHED)
2191 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2192
2193 if (s->enabled == m)
2194 return 0;
2195
2196 if (m == SD_EVENT_OFF) {
2197
2198 /* Unset the pending flag when this event source is disabled */
2199 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2200 r = source_set_pending(s, false);
2201 if (r < 0)
2202 return r;
2203 }
2204
2205 switch (s->type) {
2206
2207 case SOURCE_IO:
2208 source_io_unregister(s);
2209 s->enabled = m;
2210 break;
2211
2212 case SOURCE_TIME_REALTIME:
2213 case SOURCE_TIME_BOOTTIME:
2214 case SOURCE_TIME_MONOTONIC:
2215 case SOURCE_TIME_REALTIME_ALARM:
2216 case SOURCE_TIME_BOOTTIME_ALARM: {
2217 struct clock_data *d;
2218
2219 s->enabled = m;
2220 d = event_get_clock_data(s->event, s->type);
2221 assert(d);
2222
2223 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2224 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2225 d->needs_rearm = true;
2226 break;
2227 }
2228
2229 case SOURCE_SIGNAL:
2230 s->enabled = m;
2231
2232 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2233 break;
2234
2235 case SOURCE_CHILD:
2236 s->enabled = m;
2237
2238 assert(s->event->n_enabled_child_sources > 0);
2239 s->event->n_enabled_child_sources--;
2240
2241 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2242 break;
2243
2244 case SOURCE_EXIT:
2245 s->enabled = m;
2246 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2247 break;
2248
2249 case SOURCE_DEFER:
2250 case SOURCE_POST:
2251 case SOURCE_INOTIFY:
2252 s->enabled = m;
2253 break;
2254
2255 default:
2256 assert_not_reached("Wut? I shouldn't exist.");
2257 }
2258
2259 } else {
2260
2261 /* Unset the pending flag when this event source is enabled */
2262 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2263 r = source_set_pending(s, false);
2264 if (r < 0)
2265 return r;
2266 }
2267
2268 switch (s->type) {
2269
2270 case SOURCE_IO:
2271 r = source_io_register(s, m, s->io.events);
2272 if (r < 0)
2273 return r;
2274
2275 s->enabled = m;
2276 break;
2277
2278 case SOURCE_TIME_REALTIME:
2279 case SOURCE_TIME_BOOTTIME:
2280 case SOURCE_TIME_MONOTONIC:
2281 case SOURCE_TIME_REALTIME_ALARM:
2282 case SOURCE_TIME_BOOTTIME_ALARM: {
2283 struct clock_data *d;
2284
2285 s->enabled = m;
2286 d = event_get_clock_data(s->event, s->type);
2287 assert(d);
2288
2289 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2290 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2291 d->needs_rearm = true;
2292 break;
2293 }
2294
2295 case SOURCE_SIGNAL:
2296
2297 s->enabled = m;
2298
2299 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2300 if (r < 0) {
2301 s->enabled = SD_EVENT_OFF;
2302 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2303 return r;
2304 }
2305
2306 break;
2307
2308 case SOURCE_CHILD:
2309
2310 if (s->enabled == SD_EVENT_OFF)
2311 s->event->n_enabled_child_sources++;
2312
2313 s->enabled = m;
2314
2315 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2316 if (r < 0) {
2317 s->enabled = SD_EVENT_OFF;
2318 s->event->n_enabled_child_sources--;
2319 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2320 return r;
2321 }
2322
2323 break;
2324
2325 case SOURCE_EXIT:
2326 s->enabled = m;
2327 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2328 break;
2329
2330 case SOURCE_DEFER:
2331 case SOURCE_POST:
2332 case SOURCE_INOTIFY:
2333 s->enabled = m;
2334 break;
2335
2336 default:
2337 assert_not_reached("Wut? I shouldn't exist.");
2338 }
2339 }
2340
2341 if (s->pending)
2342 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2343
2344 if (s->prepare)
2345 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2346
2347 return 0;
2348 }
2349
2350 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2351 assert_return(s, -EINVAL);
2352 assert_return(usec, -EINVAL);
2353 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2354 assert_return(!event_pid_changed(s->event), -ECHILD);
2355
2356 *usec = s->time.next;
2357 return 0;
2358 }
2359
2360 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2361 struct clock_data *d;
2362 int r;
2363
2364 assert_return(s, -EINVAL);
2365 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2366 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2367 assert_return(!event_pid_changed(s->event), -ECHILD);
2368
2369 r = source_set_pending(s, false);
2370 if (r < 0)
2371 return r;
2372
2373 s->time.next = usec;
2374
2375 d = event_get_clock_data(s->event, s->type);
2376 assert(d);
2377
2378 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2379 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2380 d->needs_rearm = true;
2381
2382 return 0;
2383 }
2384
2385 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2386 assert_return(s, -EINVAL);
2387 assert_return(usec, -EINVAL);
2388 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2389 assert_return(!event_pid_changed(s->event), -ECHILD);
2390
2391 *usec = s->time.accuracy;
2392 return 0;
2393 }
2394
2395 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2396 struct clock_data *d;
2397 int r;
2398
2399 assert_return(s, -EINVAL);
2400 assert_return(usec != (uint64_t) -1, -EINVAL);
2401 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2402 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2403 assert_return(!event_pid_changed(s->event), -ECHILD);
2404
2405 r = source_set_pending(s, false);
2406 if (r < 0)
2407 return r;
2408
2409 if (usec == 0)
2410 usec = DEFAULT_ACCURACY_USEC;
2411
2412 s->time.accuracy = usec;
2413
2414 d = event_get_clock_data(s->event, s->type);
2415 assert(d);
2416
2417 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2418 d->needs_rearm = true;
2419
2420 return 0;
2421 }
2422
2423 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2424 assert_return(s, -EINVAL);
2425 assert_return(clock, -EINVAL);
2426 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2427 assert_return(!event_pid_changed(s->event), -ECHILD);
2428
2429 *clock = event_source_type_to_clock(s->type);
2430 return 0;
2431 }
2432
2433 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2434 assert_return(s, -EINVAL);
2435 assert_return(pid, -EINVAL);
2436 assert_return(s->type == SOURCE_CHILD, -EDOM);
2437 assert_return(!event_pid_changed(s->event), -ECHILD);
2438
2439 *pid = s->child.pid;
2440 return 0;
2441 }
2442
2443 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2444 assert_return(s, -EINVAL);
2445 assert_return(mask, -EINVAL);
2446 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2447 assert_return(!event_pid_changed(s->event), -ECHILD);
2448
2449 *mask = s->inotify.mask;
2450 return 0;
2451 }
2452
2453 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2454 int r;
2455
2456 assert_return(s, -EINVAL);
2457 assert_return(s->type != SOURCE_EXIT, -EDOM);
2458 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2459 assert_return(!event_pid_changed(s->event), -ECHILD);
2460
2461 if (s->prepare == callback)
2462 return 0;
2463
2464 if (callback && s->prepare) {
2465 s->prepare = callback;
2466 return 0;
2467 }
2468
2469 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2470 if (r < 0)
2471 return r;
2472
2473 s->prepare = callback;
2474
2475 if (callback) {
2476 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2477 if (r < 0)
2478 return r;
2479 } else
2480 prioq_remove(s->event->prepare, s, &s->prepare_index);
2481
2482 return 0;
2483 }
2484
2485 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2486 assert_return(s, NULL);
2487
2488 return s->userdata;
2489 }
2490
2491 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2492 void *ret;
2493
2494 assert_return(s, NULL);
2495
2496 ret = s->userdata;
2497 s->userdata = userdata;
2498
2499 return ret;
2500 }
2501
2502 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2503 usec_t c;
2504 assert(e);
2505 assert(a <= b);
2506
2507 if (a <= 0)
2508 return 0;
2509 if (a >= USEC_INFINITY)
2510 return USEC_INFINITY;
2511
2512 if (b <= a + 1)
2513 return a;
2514
2515 initialize_perturb(e);
2516
2517 /*
2518 Find a good time to wake up again between times a and b. We
2519 have two goals here:
2520
2521 a) We want to wake up as seldom as possible, hence prefer
2522 later times over earlier times.
2523
2524 b) But if we have to wake up, then let's make sure to
2525 dispatch as much as possible on the entire system.
2526
2527 We implement this by waking up everywhere at the same time
2528 within any given minute if we can, synchronised via the
2529 perturbation value determined from the boot ID. If we can't,
2530 then we try to find the same spot in every 10s, then 1s and
2531 then 250ms step. Otherwise, we pick the last possible time
2532 to wake up.
2533 */
2534
2535 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2536 if (c >= b) {
2537 if (_unlikely_(c < USEC_PER_MINUTE))
2538 return b;
2539
2540 c -= USEC_PER_MINUTE;
2541 }
2542
2543 if (c >= a)
2544 return c;
2545
2546 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2547 if (c >= b) {
2548 if (_unlikely_(c < USEC_PER_SEC*10))
2549 return b;
2550
2551 c -= USEC_PER_SEC*10;
2552 }
2553
2554 if (c >= a)
2555 return c;
2556
2557 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2558 if (c >= b) {
2559 if (_unlikely_(c < USEC_PER_SEC))
2560 return b;
2561
2562 c -= USEC_PER_SEC;
2563 }
2564
2565 if (c >= a)
2566 return c;
2567
2568 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2569 if (c >= b) {
2570 if (_unlikely_(c < USEC_PER_MSEC*250))
2571 return b;
2572
2573 c -= USEC_PER_MSEC*250;
2574 }
2575
2576 if (c >= a)
2577 return c;
2578
2579 return b;
2580 }
2581
2582 static int event_arm_timer(
2583 sd_event *e,
2584 struct clock_data *d) {
2585
2586 struct itimerspec its = {};
2587 sd_event_source *a, *b;
2588 usec_t t;
2589 int r;
2590
2591 assert(e);
2592 assert(d);
2593
2594 if (!d->needs_rearm)
2595 return 0;
2596 else
2597 d->needs_rearm = false;
2598
2599 a = prioq_peek(d->earliest);
2600 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2601
2602 if (d->fd < 0)
2603 return 0;
2604
2605 if (d->next == USEC_INFINITY)
2606 return 0;
2607
2608 /* disarm */
2609 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2610 if (r < 0)
2611 return r;
2612
2613 d->next = USEC_INFINITY;
2614 return 0;
2615 }
2616
2617 b = prioq_peek(d->latest);
2618 assert_se(b && b->enabled != SD_EVENT_OFF);
2619
2620 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2621 if (d->next == t)
2622 return 0;
2623
2624 assert_se(d->fd >= 0);
2625
2626 if (t == 0) {
2627 /* We don' want to disarm here, just mean some time looooong ago. */
2628 its.it_value.tv_sec = 0;
2629 its.it_value.tv_nsec = 1;
2630 } else
2631 timespec_store(&its.it_value, t);
2632
2633 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2634 if (r < 0)
2635 return -errno;
2636
2637 d->next = t;
2638 return 0;
2639 }
2640
2641 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2642 assert(e);
2643 assert(s);
2644 assert(s->type == SOURCE_IO);
2645
2646 /* If the event source was already pending, we just OR in the
2647 * new revents, otherwise we reset the value. The ORing is
2648 * necessary to handle EPOLLONESHOT events properly where
2649 * readability might happen independently of writability, and
2650 * we need to keep track of both */
2651
2652 if (s->pending)
2653 s->io.revents |= revents;
2654 else
2655 s->io.revents = revents;
2656
2657 return source_set_pending(s, true);
2658 }
2659
2660 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2661 uint64_t x;
2662 ssize_t ss;
2663
2664 assert(e);
2665 assert(fd >= 0);
2666
2667 assert_return(events == EPOLLIN, -EIO);
2668
2669 ss = read(fd, &x, sizeof(x));
2670 if (ss < 0) {
2671 if (IN_SET(errno, EAGAIN, EINTR))
2672 return 0;
2673
2674 return -errno;
2675 }
2676
2677 if (_unlikely_(ss != sizeof(x)))
2678 return -EIO;
2679
2680 if (next)
2681 *next = USEC_INFINITY;
2682
2683 return 0;
2684 }
2685
2686 static int process_timer(
2687 sd_event *e,
2688 usec_t n,
2689 struct clock_data *d) {
2690
2691 sd_event_source *s;
2692 int r;
2693
2694 assert(e);
2695 assert(d);
2696
2697 for (;;) {
2698 s = prioq_peek(d->earliest);
2699 if (!s ||
2700 s->time.next > n ||
2701 s->enabled == SD_EVENT_OFF ||
2702 s->pending)
2703 break;
2704
2705 r = source_set_pending(s, true);
2706 if (r < 0)
2707 return r;
2708
2709 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2710 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2711 d->needs_rearm = true;
2712 }
2713
2714 return 0;
2715 }
2716
2717 static int process_child(sd_event *e) {
2718 sd_event_source *s;
2719 Iterator i;
2720 int r;
2721
2722 assert(e);
2723
2724 e->need_process_child = false;
2725
2726 /*
2727 So, this is ugly. We iteratively invoke waitid() with P_PID
2728 + WNOHANG for each PID we wait for, instead of using
2729 P_ALL. This is because we only want to get child
2730 information of very specific child processes, and not all
2731 of them. We might not have processed the SIGCHLD even of a
2732 previous invocation and we don't want to maintain a
2733 unbounded *per-child* event queue, hence we really don't
2734 want anything flushed out of the kernel's queue that we
2735 don't care about. Since this is O(n) this means that if you
2736 have a lot of processes you probably want to handle SIGCHLD
2737 yourself.
2738
2739 We do not reap the children here (by using WNOWAIT), this
2740 is only done after the event source is dispatched so that
2741 the callback still sees the process as a zombie.
2742 */
2743
2744 HASHMAP_FOREACH(s, e->child_sources, i) {
2745 assert(s->type == SOURCE_CHILD);
2746
2747 if (s->pending)
2748 continue;
2749
2750 if (s->enabled == SD_EVENT_OFF)
2751 continue;
2752
2753 zero(s->child.siginfo);
2754 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2755 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2756 if (r < 0)
2757 return -errno;
2758
2759 if (s->child.siginfo.si_pid != 0) {
2760 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2761
2762 if (!zombie && (s->child.options & WEXITED)) {
2763 /* If the child isn't dead then let's
2764 * immediately remove the state change
2765 * from the queue, since there's no
2766 * benefit in leaving it queued */
2767
2768 assert(s->child.options & (WSTOPPED|WCONTINUED));
2769 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2770 }
2771
2772 r = source_set_pending(s, true);
2773 if (r < 0)
2774 return r;
2775 }
2776 }
2777
2778 return 0;
2779 }
2780
2781 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2782 bool read_one = false;
2783 int r;
2784
2785 assert(e);
2786 assert(d);
2787 assert_return(events == EPOLLIN, -EIO);
2788
2789 /* If there's a signal queued on this priority and SIGCHLD is
2790 on this priority too, then make sure to recheck the
2791 children we watch. This is because we only ever dequeue
2792 the first signal per priority, and if we dequeue one, and
2793 SIGCHLD might be enqueued later we wouldn't know, but we
2794 might have higher priority children we care about hence we
2795 need to check that explicitly. */
2796
2797 if (sigismember(&d->sigset, SIGCHLD))
2798 e->need_process_child = true;
2799
2800 /* If there's already an event source pending for this
2801 * priority we don't read another */
2802 if (d->current)
2803 return 0;
2804
2805 for (;;) {
2806 struct signalfd_siginfo si;
2807 ssize_t n;
2808 sd_event_source *s = NULL;
2809
2810 n = read(d->fd, &si, sizeof(si));
2811 if (n < 0) {
2812 if (IN_SET(errno, EAGAIN, EINTR))
2813 return read_one;
2814
2815 return -errno;
2816 }
2817
2818 if (_unlikely_(n != sizeof(si)))
2819 return -EIO;
2820
2821 assert(SIGNAL_VALID(si.ssi_signo));
2822
2823 read_one = true;
2824
2825 if (e->signal_sources)
2826 s = e->signal_sources[si.ssi_signo];
2827 if (!s)
2828 continue;
2829 if (s->pending)
2830 continue;
2831
2832 s->signal.siginfo = si;
2833 d->current = s;
2834
2835 r = source_set_pending(s, true);
2836 if (r < 0)
2837 return r;
2838
2839 return 1;
2840 }
2841 }
2842
2843 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2844 ssize_t n;
2845
2846 assert(e);
2847 assert(d);
2848
2849 assert_return(revents == EPOLLIN, -EIO);
2850
2851 /* If there's already an event source pending for this priority, don't read another */
2852 if (d->n_pending > 0)
2853 return 0;
2854
2855 /* Is the read buffer non-empty? If so, let's not read more */
2856 if (d->buffer_filled > 0)
2857 return 0;
2858
2859 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2860 if (n < 0) {
2861 if (IN_SET(errno, EAGAIN, EINTR))
2862 return 0;
2863
2864 return -errno;
2865 }
2866
2867 assert(n > 0);
2868 d->buffer_filled = (size_t) n;
2869 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2870
2871 return 1;
2872 }
2873
2874 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2875 assert(e);
2876 assert(d);
2877 assert(sz <= d->buffer_filled);
2878
2879 if (sz == 0)
2880 return;
2881
2882 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2883 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2884 d->buffer_filled -= sz;
2885
2886 if (d->buffer_filled == 0)
2887 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2888 }
2889
2890 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2891 int r;
2892
2893 assert(e);
2894 assert(d);
2895
2896 /* If there's already an event source pending for this priority, don't read another */
2897 if (d->n_pending > 0)
2898 return 0;
2899
2900 while (d->buffer_filled > 0) {
2901 size_t sz;
2902
2903 /* Let's validate that the event structures are complete */
2904 if (d->buffer_filled < offsetof(struct inotify_event, name))
2905 return -EIO;
2906
2907 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2908 if (d->buffer_filled < sz)
2909 return -EIO;
2910
2911 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2912 struct inode_data *inode_data;
2913 Iterator i;
2914
2915 /* The queue overran, let's pass this event to all event sources connected to this inotify
2916 * object */
2917
2918 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2919 sd_event_source *s;
2920
2921 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2922
2923 if (s->enabled == SD_EVENT_OFF)
2924 continue;
2925
2926 r = source_set_pending(s, true);
2927 if (r < 0)
2928 return r;
2929 }
2930 }
2931 } else {
2932 struct inode_data *inode_data;
2933 sd_event_source *s;
2934
2935 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2936 * our watch descriptor table. */
2937 if (d->buffer.ev.mask & IN_IGNORED) {
2938
2939 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2940 if (!inode_data) {
2941 event_inotify_data_drop(e, d, sz);
2942 continue;
2943 }
2944
2945 /* The watch descriptor was removed by the kernel, let's drop it here too */
2946 inode_data->wd = -1;
2947 } else {
2948 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2949 if (!inode_data) {
2950 event_inotify_data_drop(e, d, sz);
2951 continue;
2952 }
2953 }
2954
2955 /* Trigger all event sources that are interested in these events. Also trigger all event
2956 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2957 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2958
2959 if (s->enabled == SD_EVENT_OFF)
2960 continue;
2961
2962 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2963 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2964 continue;
2965
2966 r = source_set_pending(s, true);
2967 if (r < 0)
2968 return r;
2969 }
2970 }
2971
2972 /* Something pending now? If so, let's finish, otherwise let's read more. */
2973 if (d->n_pending > 0)
2974 return 1;
2975 }
2976
2977 return 0;
2978 }
2979
2980 static int process_inotify(sd_event *e) {
2981 struct inotify_data *d;
2982 int r, done = 0;
2983
2984 assert(e);
2985
2986 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2987 r = event_inotify_data_process(e, d);
2988 if (r < 0)
2989 return r;
2990 if (r > 0)
2991 done ++;
2992 }
2993
2994 return done;
2995 }
2996
2997 static int source_dispatch(sd_event_source *s) {
2998 EventSourceType saved_type;
2999 int r = 0;
3000
3001 assert(s);
3002 assert(s->pending || s->type == SOURCE_EXIT);
3003
3004 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3005 * the event. */
3006 saved_type = s->type;
3007
3008 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3009 r = source_set_pending(s, false);
3010 if (r < 0)
3011 return r;
3012 }
3013
3014 if (s->type != SOURCE_POST) {
3015 sd_event_source *z;
3016 Iterator i;
3017
3018 /* If we execute a non-post source, let's mark all
3019 * post sources as pending */
3020
3021 SET_FOREACH(z, s->event->post_sources, i) {
3022 if (z->enabled == SD_EVENT_OFF)
3023 continue;
3024
3025 r = source_set_pending(z, true);
3026 if (r < 0)
3027 return r;
3028 }
3029 }
3030
3031 if (s->enabled == SD_EVENT_ONESHOT) {
3032 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3033 if (r < 0)
3034 return r;
3035 }
3036
3037 s->dispatching = true;
3038
3039 switch (s->type) {
3040
3041 case SOURCE_IO:
3042 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3043 break;
3044
3045 case SOURCE_TIME_REALTIME:
3046 case SOURCE_TIME_BOOTTIME:
3047 case SOURCE_TIME_MONOTONIC:
3048 case SOURCE_TIME_REALTIME_ALARM:
3049 case SOURCE_TIME_BOOTTIME_ALARM:
3050 r = s->time.callback(s, s->time.next, s->userdata);
3051 break;
3052
3053 case SOURCE_SIGNAL:
3054 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3055 break;
3056
3057 case SOURCE_CHILD: {
3058 bool zombie;
3059
3060 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3061
3062 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3063
3064 /* Now, reap the PID for good. */
3065 if (zombie)
3066 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3067
3068 break;
3069 }
3070
3071 case SOURCE_DEFER:
3072 r = s->defer.callback(s, s->userdata);
3073 break;
3074
3075 case SOURCE_POST:
3076 r = s->post.callback(s, s->userdata);
3077 break;
3078
3079 case SOURCE_EXIT:
3080 r = s->exit.callback(s, s->userdata);
3081 break;
3082
3083 case SOURCE_INOTIFY: {
3084 struct sd_event *e = s->event;
3085 struct inotify_data *d;
3086 size_t sz;
3087
3088 assert(s->inotify.inode_data);
3089 assert_se(d = s->inotify.inode_data->inotify_data);
3090
3091 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3092 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3093 assert(d->buffer_filled >= sz);
3094
3095 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3096
3097 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3098 * buffer. */
3099 if (d->n_pending == 0)
3100 event_inotify_data_drop(e, d, sz);
3101
3102 break;
3103 }
3104
3105 case SOURCE_WATCHDOG:
3106 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3107 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3108 assert_not_reached("Wut? I shouldn't exist.");
3109 }
3110
3111 s->dispatching = false;
3112
3113 if (r < 0)
3114 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3115 strna(s->description), event_source_type_to_string(saved_type));
3116
3117 if (s->n_ref == 0)
3118 source_free(s);
3119 else if (r < 0)
3120 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3121
3122 return 1;
3123 }
3124
3125 static int event_prepare(sd_event *e) {
3126 int r;
3127
3128 assert(e);
3129
3130 for (;;) {
3131 sd_event_source *s;
3132
3133 s = prioq_peek(e->prepare);
3134 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3135 break;
3136
3137 s->prepare_iteration = e->iteration;
3138 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3139 if (r < 0)
3140 return r;
3141
3142 assert(s->prepare);
3143
3144 s->dispatching = true;
3145 r = s->prepare(s, s->userdata);
3146 s->dispatching = false;
3147
3148 if (r < 0)
3149 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3150 strna(s->description), event_source_type_to_string(s->type));
3151
3152 if (s->n_ref == 0)
3153 source_free(s);
3154 else if (r < 0)
3155 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3156 }
3157
3158 return 0;
3159 }
3160
3161 static int dispatch_exit(sd_event *e) {
3162 sd_event_source *p;
3163 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3164 int r;
3165
3166 assert(e);
3167
3168 p = prioq_peek(e->exit);
3169 if (!p || p->enabled == SD_EVENT_OFF) {
3170 e->state = SD_EVENT_FINISHED;
3171 return 0;
3172 }
3173
3174 ref = sd_event_ref(e);
3175 e->iteration++;
3176 e->state = SD_EVENT_EXITING;
3177 r = source_dispatch(p);
3178 e->state = SD_EVENT_INITIAL;
3179 return r;
3180 }
3181
3182 static sd_event_source* event_next_pending(sd_event *e) {
3183 sd_event_source *p;
3184
3185 assert(e);
3186
3187 p = prioq_peek(e->pending);
3188 if (!p)
3189 return NULL;
3190
3191 if (p->enabled == SD_EVENT_OFF)
3192 return NULL;
3193
3194 return p;
3195 }
3196
3197 static int arm_watchdog(sd_event *e) {
3198 struct itimerspec its = {};
3199 usec_t t;
3200 int r;
3201
3202 assert(e);
3203 assert(e->watchdog_fd >= 0);
3204
3205 t = sleep_between(e,
3206 e->watchdog_last + (e->watchdog_period / 2),
3207 e->watchdog_last + (e->watchdog_period * 3 / 4));
3208
3209 timespec_store(&its.it_value, t);
3210
3211 /* Make sure we never set the watchdog to 0, which tells the
3212 * kernel to disable it. */
3213 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3214 its.it_value.tv_nsec = 1;
3215
3216 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3217 if (r < 0)
3218 return -errno;
3219
3220 return 0;
3221 }
3222
3223 static int process_watchdog(sd_event *e) {
3224 assert(e);
3225
3226 if (!e->watchdog)
3227 return 0;
3228
3229 /* Don't notify watchdog too often */
3230 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3231 return 0;
3232
3233 sd_notify(false, "WATCHDOG=1");
3234 e->watchdog_last = e->timestamp.monotonic;
3235
3236 return arm_watchdog(e);
3237 }
3238
3239 static void event_close_inode_data_fds(sd_event *e) {
3240 struct inode_data *d;
3241
3242 assert(e);
3243
3244 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3245 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3246 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3247 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3248 * compromise. */
3249
3250 while ((d = e->inode_data_to_close)) {
3251 assert(d->fd >= 0);
3252 d->fd = safe_close(d->fd);
3253
3254 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3255 }
3256 }
3257
3258 _public_ int sd_event_prepare(sd_event *e) {
3259 int r;
3260
3261 assert_return(e, -EINVAL);
3262 assert_return(e = event_resolve(e), -ENOPKG);
3263 assert_return(!event_pid_changed(e), -ECHILD);
3264 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3265 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3266
3267 if (e->exit_requested)
3268 goto pending;
3269
3270 e->iteration++;
3271
3272 e->state = SD_EVENT_PREPARING;
3273 r = event_prepare(e);
3274 e->state = SD_EVENT_INITIAL;
3275 if (r < 0)
3276 return r;
3277
3278 r = event_arm_timer(e, &e->realtime);
3279 if (r < 0)
3280 return r;
3281
3282 r = event_arm_timer(e, &e->boottime);
3283 if (r < 0)
3284 return r;
3285
3286 r = event_arm_timer(e, &e->monotonic);
3287 if (r < 0)
3288 return r;
3289
3290 r = event_arm_timer(e, &e->realtime_alarm);
3291 if (r < 0)
3292 return r;
3293
3294 r = event_arm_timer(e, &e->boottime_alarm);
3295 if (r < 0)
3296 return r;
3297
3298 event_close_inode_data_fds(e);
3299
3300 if (event_next_pending(e) || e->need_process_child)
3301 goto pending;
3302
3303 e->state = SD_EVENT_ARMED;
3304
3305 return 0;
3306
3307 pending:
3308 e->state = SD_EVENT_ARMED;
3309 r = sd_event_wait(e, 0);
3310 if (r == 0)
3311 e->state = SD_EVENT_ARMED;
3312
3313 return r;
3314 }
3315
3316 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3317 struct epoll_event *ev_queue;
3318 unsigned ev_queue_max;
3319 int r, m, i;
3320
3321 assert_return(e, -EINVAL);
3322 assert_return(e = event_resolve(e), -ENOPKG);
3323 assert_return(!event_pid_changed(e), -ECHILD);
3324 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3325 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3326
3327 if (e->exit_requested) {
3328 e->state = SD_EVENT_PENDING;
3329 return 1;
3330 }
3331
3332 ev_queue_max = MAX(e->n_sources, 1u);
3333 ev_queue = newa(struct epoll_event, ev_queue_max);
3334
3335 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3336 if (e->inotify_data_buffered)
3337 timeout = 0;
3338
3339 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3340 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3341 if (m < 0) {
3342 if (errno == EINTR) {
3343 e->state = SD_EVENT_PENDING;
3344 return 1;
3345 }
3346
3347 r = -errno;
3348 goto finish;
3349 }
3350
3351 triple_timestamp_get(&e->timestamp);
3352
3353 for (i = 0; i < m; i++) {
3354
3355 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3356 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3357 else {
3358 WakeupType *t = ev_queue[i].data.ptr;
3359
3360 switch (*t) {
3361
3362 case WAKEUP_EVENT_SOURCE:
3363 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3364 break;
3365
3366 case WAKEUP_CLOCK_DATA: {
3367 struct clock_data *d = ev_queue[i].data.ptr;
3368 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3369 break;
3370 }
3371
3372 case WAKEUP_SIGNAL_DATA:
3373 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3374 break;
3375
3376 case WAKEUP_INOTIFY_DATA:
3377 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3378 break;
3379
3380 default:
3381 assert_not_reached("Invalid wake-up pointer");
3382 }
3383 }
3384 if (r < 0)
3385 goto finish;
3386 }
3387
3388 r = process_watchdog(e);
3389 if (r < 0)
3390 goto finish;
3391
3392 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3393 if (r < 0)
3394 goto finish;
3395
3396 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3397 if (r < 0)
3398 goto finish;
3399
3400 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3401 if (r < 0)
3402 goto finish;
3403
3404 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3405 if (r < 0)
3406 goto finish;
3407
3408 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3409 if (r < 0)
3410 goto finish;
3411
3412 if (e->need_process_child) {
3413 r = process_child(e);
3414 if (r < 0)
3415 goto finish;
3416 }
3417
3418 r = process_inotify(e);
3419 if (r < 0)
3420 goto finish;
3421
3422 if (event_next_pending(e)) {
3423 e->state = SD_EVENT_PENDING;
3424
3425 return 1;
3426 }
3427
3428 r = 0;
3429
3430 finish:
3431 e->state = SD_EVENT_INITIAL;
3432
3433 return r;
3434 }
3435
3436 _public_ int sd_event_dispatch(sd_event *e) {
3437 sd_event_source *p;
3438 int r;
3439
3440 assert_return(e, -EINVAL);
3441 assert_return(e = event_resolve(e), -ENOPKG);
3442 assert_return(!event_pid_changed(e), -ECHILD);
3443 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3444 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3445
3446 if (e->exit_requested)
3447 return dispatch_exit(e);
3448
3449 p = event_next_pending(e);
3450 if (p) {
3451 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3452
3453 ref = sd_event_ref(e);
3454 e->state = SD_EVENT_RUNNING;
3455 r = source_dispatch(p);
3456 e->state = SD_EVENT_INITIAL;
3457 return r;
3458 }
3459
3460 e->state = SD_EVENT_INITIAL;
3461
3462 return 1;
3463 }
3464
3465 static void event_log_delays(sd_event *e) {
3466 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3467 unsigned i;
3468 int o;
3469
3470 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3471 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3472 e->delays[i] = 0;
3473 }
3474 log_debug("Event loop iterations: %.*s", o, b);
3475 }
3476
3477 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3478 int r;
3479
3480 assert_return(e, -EINVAL);
3481 assert_return(e = event_resolve(e), -ENOPKG);
3482 assert_return(!event_pid_changed(e), -ECHILD);
3483 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3484 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3485
3486 if (e->profile_delays && e->last_run) {
3487 usec_t this_run;
3488 unsigned l;
3489
3490 this_run = now(CLOCK_MONOTONIC);
3491
3492 l = u64log2(this_run - e->last_run);
3493 assert(l < sizeof(e->delays));
3494 e->delays[l]++;
3495
3496 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3497 event_log_delays(e);
3498 e->last_log = this_run;
3499 }
3500 }
3501
3502 r = sd_event_prepare(e);
3503 if (r == 0)
3504 /* There was nothing? Then wait... */
3505 r = sd_event_wait(e, timeout);
3506
3507 if (e->profile_delays)
3508 e->last_run = now(CLOCK_MONOTONIC);
3509
3510 if (r > 0) {
3511 /* There's something now, then let's dispatch it */
3512 r = sd_event_dispatch(e);
3513 if (r < 0)
3514 return r;
3515
3516 return 1;
3517 }
3518
3519 return r;
3520 }
3521
3522 _public_ int sd_event_loop(sd_event *e) {
3523 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3524 int r;
3525
3526 assert_return(e, -EINVAL);
3527 assert_return(e = event_resolve(e), -ENOPKG);
3528 assert_return(!event_pid_changed(e), -ECHILD);
3529 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3530
3531 ref = sd_event_ref(e);
3532
3533 while (e->state != SD_EVENT_FINISHED) {
3534 r = sd_event_run(e, (uint64_t) -1);
3535 if (r < 0)
3536 return r;
3537 }
3538
3539 return e->exit_code;
3540 }
3541
3542 _public_ int sd_event_get_fd(sd_event *e) {
3543
3544 assert_return(e, -EINVAL);
3545 assert_return(e = event_resolve(e), -ENOPKG);
3546 assert_return(!event_pid_changed(e), -ECHILD);
3547
3548 return e->epoll_fd;
3549 }
3550
3551 _public_ int sd_event_get_state(sd_event *e) {
3552 assert_return(e, -EINVAL);
3553 assert_return(e = event_resolve(e), -ENOPKG);
3554 assert_return(!event_pid_changed(e), -ECHILD);
3555
3556 return e->state;
3557 }
3558
3559 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3560 assert_return(e, -EINVAL);
3561 assert_return(e = event_resolve(e), -ENOPKG);
3562 assert_return(code, -EINVAL);
3563 assert_return(!event_pid_changed(e), -ECHILD);
3564
3565 if (!e->exit_requested)
3566 return -ENODATA;
3567
3568 *code = e->exit_code;
3569 return 0;
3570 }
3571
3572 _public_ int sd_event_exit(sd_event *e, int code) {
3573 assert_return(e, -EINVAL);
3574 assert_return(e = event_resolve(e), -ENOPKG);
3575 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3576 assert_return(!event_pid_changed(e), -ECHILD);
3577
3578 e->exit_requested = true;
3579 e->exit_code = code;
3580
3581 return 0;
3582 }
3583
3584 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3585 assert_return(e, -EINVAL);
3586 assert_return(e = event_resolve(e), -ENOPKG);
3587 assert_return(usec, -EINVAL);
3588 assert_return(!event_pid_changed(e), -ECHILD);
3589
3590 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3591 return -EOPNOTSUPP;
3592
3593 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3594 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3595 * the purpose of getting the time this doesn't matter. */
3596 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3597 return -EOPNOTSUPP;
3598
3599 if (!triple_timestamp_is_set(&e->timestamp)) {
3600 /* Implicitly fall back to now() if we never ran
3601 * before and thus have no cached time. */
3602 *usec = now(clock);
3603 return 1;
3604 }
3605
3606 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3607 return 0;
3608 }
3609
3610 _public_ int sd_event_default(sd_event **ret) {
3611 sd_event *e = NULL;
3612 int r;
3613
3614 if (!ret)
3615 return !!default_event;
3616
3617 if (default_event) {
3618 *ret = sd_event_ref(default_event);
3619 return 0;
3620 }
3621
3622 r = sd_event_new(&e);
3623 if (r < 0)
3624 return r;
3625
3626 e->default_event_ptr = &default_event;
3627 e->tid = gettid();
3628 default_event = e;
3629
3630 *ret = e;
3631 return 1;
3632 }
3633
3634 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3635 assert_return(e, -EINVAL);
3636 assert_return(e = event_resolve(e), -ENOPKG);
3637 assert_return(tid, -EINVAL);
3638 assert_return(!event_pid_changed(e), -ECHILD);
3639
3640 if (e->tid != 0) {
3641 *tid = e->tid;
3642 return 0;
3643 }
3644
3645 return -ENXIO;
3646 }
3647
3648 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3649 int r;
3650
3651 assert_return(e, -EINVAL);
3652 assert_return(e = event_resolve(e), -ENOPKG);
3653 assert_return(!event_pid_changed(e), -ECHILD);
3654
3655 if (e->watchdog == !!b)
3656 return e->watchdog;
3657
3658 if (b) {
3659 struct epoll_event ev;
3660
3661 r = sd_watchdog_enabled(false, &e->watchdog_period);
3662 if (r <= 0)
3663 return r;
3664
3665 /* Issue first ping immediately */
3666 sd_notify(false, "WATCHDOG=1");
3667 e->watchdog_last = now(CLOCK_MONOTONIC);
3668
3669 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3670 if (e->watchdog_fd < 0)
3671 return -errno;
3672
3673 r = arm_watchdog(e);
3674 if (r < 0)
3675 goto fail;
3676
3677 ev = (struct epoll_event) {
3678 .events = EPOLLIN,
3679 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3680 };
3681
3682 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3683 if (r < 0) {
3684 r = -errno;
3685 goto fail;
3686 }
3687
3688 } else {
3689 if (e->watchdog_fd >= 0) {
3690 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3691 e->watchdog_fd = safe_close(e->watchdog_fd);
3692 }
3693 }
3694
3695 e->watchdog = !!b;
3696 return e->watchdog;
3697
3698 fail:
3699 e->watchdog_fd = safe_close(e->watchdog_fd);
3700 return r;
3701 }
3702
3703 _public_ int sd_event_get_watchdog(sd_event *e) {
3704 assert_return(e, -EINVAL);
3705 assert_return(e = event_resolve(e), -ENOPKG);
3706 assert_return(!event_pid_changed(e), -ECHILD);
3707
3708 return e->watchdog;
3709 }
3710
3711 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3712 assert_return(e, -EINVAL);
3713 assert_return(e = event_resolve(e), -ENOPKG);
3714 assert_return(!event_pid_changed(e), -ECHILD);
3715
3716 *ret = e->iteration;
3717 return 0;
3718 }
3719
3720 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3721 assert_return(s, -EINVAL);
3722
3723 s->destroy_callback = callback;
3724 return 0;
3725 }
3726
3727 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3728 assert_return(s, -EINVAL);
3729
3730 if (ret)
3731 *ret = s->destroy_callback;
3732
3733 return !!s->destroy_callback;
3734 }