]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
tree-wide: use CMP() macros where applicable
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "fd-util.h"
13 #include "fs-util.h"
14 #include "hashmap.h"
15 #include "list.h"
16 #include "macro.h"
17 #include "missing.h"
18 #include "prioq.h"
19 #include "process-util.h"
20 #include "set.h"
21 #include "signal-util.h"
22 #include "string-table.h"
23 #include "string-util.h"
24 #include "time-util.h"
25 #include "util.h"
26
27 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
28
29 typedef enum EventSourceType {
30 SOURCE_IO,
31 SOURCE_TIME_REALTIME,
32 SOURCE_TIME_BOOTTIME,
33 SOURCE_TIME_MONOTONIC,
34 SOURCE_TIME_REALTIME_ALARM,
35 SOURCE_TIME_BOOTTIME_ALARM,
36 SOURCE_SIGNAL,
37 SOURCE_CHILD,
38 SOURCE_DEFER,
39 SOURCE_POST,
40 SOURCE_EXIT,
41 SOURCE_WATCHDOG,
42 SOURCE_INOTIFY,
43 _SOURCE_EVENT_SOURCE_TYPE_MAX,
44 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
45 } EventSourceType;
46
47 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
48 [SOURCE_IO] = "io",
49 [SOURCE_TIME_REALTIME] = "realtime",
50 [SOURCE_TIME_BOOTTIME] = "bootime",
51 [SOURCE_TIME_MONOTONIC] = "monotonic",
52 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
53 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
54 [SOURCE_SIGNAL] = "signal",
55 [SOURCE_CHILD] = "child",
56 [SOURCE_DEFER] = "defer",
57 [SOURCE_POST] = "post",
58 [SOURCE_EXIT] = "exit",
59 [SOURCE_WATCHDOG] = "watchdog",
60 [SOURCE_INOTIFY] = "inotify",
61 };
62
63 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
64
65 /* All objects we use in epoll events start with this value, so that
66 * we know how to dispatch it */
67 typedef enum WakeupType {
68 WAKEUP_NONE,
69 WAKEUP_EVENT_SOURCE,
70 WAKEUP_CLOCK_DATA,
71 WAKEUP_SIGNAL_DATA,
72 WAKEUP_INOTIFY_DATA,
73 _WAKEUP_TYPE_MAX,
74 _WAKEUP_TYPE_INVALID = -1,
75 } WakeupType;
76
77 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
78
79 struct inode_data;
80
81 struct sd_event_source {
82 WakeupType wakeup;
83
84 unsigned n_ref;
85
86 sd_event *event;
87 void *userdata;
88 sd_event_handler_t prepare;
89
90 char *description;
91
92 EventSourceType type:5;
93 signed int enabled:3;
94 bool pending:1;
95 bool dispatching:1;
96 bool floating:1;
97
98 int64_t priority;
99 unsigned pending_index;
100 unsigned prepare_index;
101 uint64_t pending_iteration;
102 uint64_t prepare_iteration;
103
104 sd_event_destroy_t destroy_callback;
105
106 LIST_FIELDS(sd_event_source, sources);
107
108 union {
109 struct {
110 sd_event_io_handler_t callback;
111 int fd;
112 uint32_t events;
113 uint32_t revents;
114 bool registered:1;
115 bool owned:1;
116 } io;
117 struct {
118 sd_event_time_handler_t callback;
119 usec_t next, accuracy;
120 unsigned earliest_index;
121 unsigned latest_index;
122 } time;
123 struct {
124 sd_event_signal_handler_t callback;
125 struct signalfd_siginfo siginfo;
126 int sig;
127 } signal;
128 struct {
129 sd_event_child_handler_t callback;
130 siginfo_t siginfo;
131 pid_t pid;
132 int options;
133 } child;
134 struct {
135 sd_event_handler_t callback;
136 } defer;
137 struct {
138 sd_event_handler_t callback;
139 } post;
140 struct {
141 sd_event_handler_t callback;
142 unsigned prioq_index;
143 } exit;
144 struct {
145 sd_event_inotify_handler_t callback;
146 uint32_t mask;
147 struct inode_data *inode_data;
148 LIST_FIELDS(sd_event_source, by_inode_data);
149 } inotify;
150 };
151 };
152
153 struct clock_data {
154 WakeupType wakeup;
155 int fd;
156
157 /* For all clocks we maintain two priority queues each, one
158 * ordered for the earliest times the events may be
159 * dispatched, and one ordered by the latest times they must
160 * have been dispatched. The range between the top entries in
161 * the two prioqs is the time window we can freely schedule
162 * wakeups in */
163
164 Prioq *earliest;
165 Prioq *latest;
166 usec_t next;
167
168 bool needs_rearm:1;
169 };
170
171 struct signal_data {
172 WakeupType wakeup;
173
174 /* For each priority we maintain one signal fd, so that we
175 * only have to dequeue a single event per priority at a
176 * time. */
177
178 int fd;
179 int64_t priority;
180 sigset_t sigset;
181 sd_event_source *current;
182 };
183
184 /* A structure listing all event sources currently watching a specific inode */
185 struct inode_data {
186 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
187 ino_t ino;
188 dev_t dev;
189
190 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
191 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
192 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
193 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
194 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
195 * iteration. */
196 int fd;
197
198 /* The inotify "watch descriptor" */
199 int wd;
200
201 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
202 * most recently been set on the watch descriptor. */
203 uint32_t combined_mask;
204
205 /* All event sources subscribed to this inode */
206 LIST_HEAD(sd_event_source, event_sources);
207
208 /* The inotify object we watch this inode with */
209 struct inotify_data *inotify_data;
210
211 /* A linked list of all inode data objects with fds to close (see above) */
212 LIST_FIELDS(struct inode_data, to_close);
213 };
214
215 /* A structure encapsulating an inotify fd */
216 struct inotify_data {
217 WakeupType wakeup;
218
219 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
220 * a time */
221
222 int fd;
223 int64_t priority;
224
225 Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
226 Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
227
228 /* The buffer we read inotify events into */
229 union inotify_event_buffer buffer;
230 size_t buffer_filled; /* fill level of the buffer */
231
232 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
233 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
234 * the events locally if they can't be coalesced). */
235 unsigned n_pending;
236
237 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
238 * to make it efficient to figure out what inotify objects to process data on next. */
239 LIST_FIELDS(struct inotify_data, buffered);
240 };
241
242 struct sd_event {
243 unsigned n_ref;
244
245 int epoll_fd;
246 int watchdog_fd;
247
248 Prioq *pending;
249 Prioq *prepare;
250
251 /* timerfd_create() only supports these five clocks so far. We
252 * can add support for more clocks when the kernel learns to
253 * deal with them, too. */
254 struct clock_data realtime;
255 struct clock_data boottime;
256 struct clock_data monotonic;
257 struct clock_data realtime_alarm;
258 struct clock_data boottime_alarm;
259
260 usec_t perturb;
261
262 sd_event_source **signal_sources; /* indexed by signal number */
263 Hashmap *signal_data; /* indexed by priority */
264
265 Hashmap *child_sources;
266 unsigned n_enabled_child_sources;
267
268 Set *post_sources;
269
270 Prioq *exit;
271
272 Hashmap *inotify_data; /* indexed by priority */
273
274 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
275 LIST_HEAD(struct inode_data, inode_data_to_close);
276
277 /* A list of inotify objects that already have events buffered which aren't processed yet */
278 LIST_HEAD(struct inotify_data, inotify_data_buffered);
279
280 pid_t original_pid;
281
282 uint64_t iteration;
283 triple_timestamp timestamp;
284 int state;
285
286 bool exit_requested:1;
287 bool need_process_child:1;
288 bool watchdog:1;
289 bool profile_delays:1;
290
291 int exit_code;
292
293 pid_t tid;
294 sd_event **default_event_ptr;
295
296 usec_t watchdog_last, watchdog_period;
297
298 unsigned n_sources;
299
300 LIST_HEAD(sd_event_source, sources);
301
302 usec_t last_run, last_log;
303 unsigned delays[sizeof(usec_t) * 8];
304 };
305
306 static thread_local sd_event *default_event = NULL;
307
308 static void source_disconnect(sd_event_source *s);
309 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
310
311 static sd_event *event_resolve(sd_event *e) {
312 return e == SD_EVENT_DEFAULT ? default_event : e;
313 }
314
315 static int pending_prioq_compare(const void *a, const void *b) {
316 const sd_event_source *x = a, *y = b;
317 int r;
318
319 assert(x->pending);
320 assert(y->pending);
321
322 /* Enabled ones first */
323 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
324 return -1;
325 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
326 return 1;
327
328 /* Lower priority values first */
329 r = CMP(x->priority, y->priority);
330 if (r != 0)
331 return r;
332
333 /* Older entries first */
334 return CMP(x->pending_iteration, y->pending_iteration);
335 }
336
337 static int prepare_prioq_compare(const void *a, const void *b) {
338 const sd_event_source *x = a, *y = b;
339 int r;
340
341 assert(x->prepare);
342 assert(y->prepare);
343
344 /* Enabled ones first */
345 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
346 return -1;
347 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
348 return 1;
349
350 /* Move most recently prepared ones last, so that we can stop
351 * preparing as soon as we hit one that has already been
352 * prepared in the current iteration */
353 r = CMP(x->prepare_iteration, y->prepare_iteration);
354 if (r != 0)
355 return r;
356
357 /* Lower priority values first */
358 return CMP(x->priority, y->priority);
359 }
360
361 static int earliest_time_prioq_compare(const void *a, const void *b) {
362 const sd_event_source *x = a, *y = b;
363
364 assert(EVENT_SOURCE_IS_TIME(x->type));
365 assert(x->type == y->type);
366
367 /* Enabled ones first */
368 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
369 return -1;
370 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
371 return 1;
372
373 /* Move the pending ones to the end */
374 if (!x->pending && y->pending)
375 return -1;
376 if (x->pending && !y->pending)
377 return 1;
378
379 /* Order by time */
380 return CMP(x->time.next, y->time.next);
381 }
382
383 static usec_t time_event_source_latest(const sd_event_source *s) {
384 return usec_add(s->time.next, s->time.accuracy);
385 }
386
387 static int latest_time_prioq_compare(const void *a, const void *b) {
388 const sd_event_source *x = a, *y = b;
389
390 assert(EVENT_SOURCE_IS_TIME(x->type));
391 assert(x->type == y->type);
392
393 /* Enabled ones first */
394 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
395 return -1;
396 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
397 return 1;
398
399 /* Move the pending ones to the end */
400 if (!x->pending && y->pending)
401 return -1;
402 if (x->pending && !y->pending)
403 return 1;
404
405 /* Order by time */
406 return CMP(time_event_source_latest(x), time_event_source_latest(y));
407 }
408
409 static int exit_prioq_compare(const void *a, const void *b) {
410 const sd_event_source *x = a, *y = b;
411
412 assert(x->type == SOURCE_EXIT);
413 assert(y->type == SOURCE_EXIT);
414
415 /* Enabled ones first */
416 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
417 return -1;
418 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
419 return 1;
420
421 /* Lower priority values first */
422 if (x->priority < y->priority)
423 return -1;
424 if (x->priority > y->priority)
425 return 1;
426
427 return 0;
428 }
429
430 static void free_clock_data(struct clock_data *d) {
431 assert(d);
432 assert(d->wakeup == WAKEUP_CLOCK_DATA);
433
434 safe_close(d->fd);
435 prioq_free(d->earliest);
436 prioq_free(d->latest);
437 }
438
439 static sd_event *event_free(sd_event *e) {
440 sd_event_source *s;
441
442 assert(e);
443
444 while ((s = e->sources)) {
445 assert(s->floating);
446 source_disconnect(s);
447 sd_event_source_unref(s);
448 }
449
450 assert(e->n_sources == 0);
451
452 if (e->default_event_ptr)
453 *(e->default_event_ptr) = NULL;
454
455 safe_close(e->epoll_fd);
456 safe_close(e->watchdog_fd);
457
458 free_clock_data(&e->realtime);
459 free_clock_data(&e->boottime);
460 free_clock_data(&e->monotonic);
461 free_clock_data(&e->realtime_alarm);
462 free_clock_data(&e->boottime_alarm);
463
464 prioq_free(e->pending);
465 prioq_free(e->prepare);
466 prioq_free(e->exit);
467
468 free(e->signal_sources);
469 hashmap_free(e->signal_data);
470
471 hashmap_free(e->inotify_data);
472
473 hashmap_free(e->child_sources);
474 set_free(e->post_sources);
475
476 return mfree(e);
477 }
478
479 _public_ int sd_event_new(sd_event** ret) {
480 sd_event *e;
481 int r;
482
483 assert_return(ret, -EINVAL);
484
485 e = new(sd_event, 1);
486 if (!e)
487 return -ENOMEM;
488
489 *e = (sd_event) {
490 .n_ref = 1,
491 .epoll_fd = -1,
492 .watchdog_fd = -1,
493 .realtime.wakeup = WAKEUP_CLOCK_DATA,
494 .realtime.fd = -1,
495 .realtime.next = USEC_INFINITY,
496 .boottime.wakeup = WAKEUP_CLOCK_DATA,
497 .boottime.fd = -1,
498 .boottime.next = USEC_INFINITY,
499 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
500 .monotonic.fd = -1,
501 .monotonic.next = USEC_INFINITY,
502 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
503 .realtime_alarm.fd = -1,
504 .realtime_alarm.next = USEC_INFINITY,
505 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
506 .boottime_alarm.fd = -1,
507 .boottime_alarm.next = USEC_INFINITY,
508 .perturb = USEC_INFINITY,
509 .original_pid = getpid_cached(),
510 };
511
512 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
513 if (r < 0)
514 goto fail;
515
516 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
517 if (e->epoll_fd < 0) {
518 r = -errno;
519 goto fail;
520 }
521
522 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
523
524 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
525 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
526 e->profile_delays = true;
527 }
528
529 *ret = e;
530 return 0;
531
532 fail:
533 event_free(e);
534 return r;
535 }
536
537 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
538
539 static bool event_pid_changed(sd_event *e) {
540 assert(e);
541
542 /* We don't support people creating an event loop and keeping
543 * it around over a fork(). Let's complain. */
544
545 return e->original_pid != getpid_cached();
546 }
547
548 static void source_io_unregister(sd_event_source *s) {
549 int r;
550
551 assert(s);
552 assert(s->type == SOURCE_IO);
553
554 if (event_pid_changed(s->event))
555 return;
556
557 if (!s->io.registered)
558 return;
559
560 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
561 if (r < 0)
562 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
563 strna(s->description), event_source_type_to_string(s->type));
564
565 s->io.registered = false;
566 }
567
568 static int source_io_register(
569 sd_event_source *s,
570 int enabled,
571 uint32_t events) {
572
573 struct epoll_event ev;
574 int r;
575
576 assert(s);
577 assert(s->type == SOURCE_IO);
578 assert(enabled != SD_EVENT_OFF);
579
580 ev = (struct epoll_event) {
581 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
582 .data.ptr = s,
583 };
584
585 if (s->io.registered)
586 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
587 else
588 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
589 if (r < 0)
590 return -errno;
591
592 s->io.registered = true;
593
594 return 0;
595 }
596
597 static clockid_t event_source_type_to_clock(EventSourceType t) {
598
599 switch (t) {
600
601 case SOURCE_TIME_REALTIME:
602 return CLOCK_REALTIME;
603
604 case SOURCE_TIME_BOOTTIME:
605 return CLOCK_BOOTTIME;
606
607 case SOURCE_TIME_MONOTONIC:
608 return CLOCK_MONOTONIC;
609
610 case SOURCE_TIME_REALTIME_ALARM:
611 return CLOCK_REALTIME_ALARM;
612
613 case SOURCE_TIME_BOOTTIME_ALARM:
614 return CLOCK_BOOTTIME_ALARM;
615
616 default:
617 return (clockid_t) -1;
618 }
619 }
620
621 static EventSourceType clock_to_event_source_type(clockid_t clock) {
622
623 switch (clock) {
624
625 case CLOCK_REALTIME:
626 return SOURCE_TIME_REALTIME;
627
628 case CLOCK_BOOTTIME:
629 return SOURCE_TIME_BOOTTIME;
630
631 case CLOCK_MONOTONIC:
632 return SOURCE_TIME_MONOTONIC;
633
634 case CLOCK_REALTIME_ALARM:
635 return SOURCE_TIME_REALTIME_ALARM;
636
637 case CLOCK_BOOTTIME_ALARM:
638 return SOURCE_TIME_BOOTTIME_ALARM;
639
640 default:
641 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
642 }
643 }
644
645 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
646 assert(e);
647
648 switch (t) {
649
650 case SOURCE_TIME_REALTIME:
651 return &e->realtime;
652
653 case SOURCE_TIME_BOOTTIME:
654 return &e->boottime;
655
656 case SOURCE_TIME_MONOTONIC:
657 return &e->monotonic;
658
659 case SOURCE_TIME_REALTIME_ALARM:
660 return &e->realtime_alarm;
661
662 case SOURCE_TIME_BOOTTIME_ALARM:
663 return &e->boottime_alarm;
664
665 default:
666 return NULL;
667 }
668 }
669
670 static int event_make_signal_data(
671 sd_event *e,
672 int sig,
673 struct signal_data **ret) {
674
675 struct epoll_event ev;
676 struct signal_data *d;
677 bool added = false;
678 sigset_t ss_copy;
679 int64_t priority;
680 int r;
681
682 assert(e);
683
684 if (event_pid_changed(e))
685 return -ECHILD;
686
687 if (e->signal_sources && e->signal_sources[sig])
688 priority = e->signal_sources[sig]->priority;
689 else
690 priority = SD_EVENT_PRIORITY_NORMAL;
691
692 d = hashmap_get(e->signal_data, &priority);
693 if (d) {
694 if (sigismember(&d->sigset, sig) > 0) {
695 if (ret)
696 *ret = d;
697 return 0;
698 }
699 } else {
700 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
701 if (r < 0)
702 return r;
703
704 d = new(struct signal_data, 1);
705 if (!d)
706 return -ENOMEM;
707
708 *d = (struct signal_data) {
709 .wakeup = WAKEUP_SIGNAL_DATA,
710 .fd = -1,
711 .priority = priority,
712 };
713
714 r = hashmap_put(e->signal_data, &d->priority, d);
715 if (r < 0) {
716 free(d);
717 return r;
718 }
719
720 added = true;
721 }
722
723 ss_copy = d->sigset;
724 assert_se(sigaddset(&ss_copy, sig) >= 0);
725
726 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
727 if (r < 0) {
728 r = -errno;
729 goto fail;
730 }
731
732 d->sigset = ss_copy;
733
734 if (d->fd >= 0) {
735 if (ret)
736 *ret = d;
737 return 0;
738 }
739
740 d->fd = fd_move_above_stdio(r);
741
742 ev = (struct epoll_event) {
743 .events = EPOLLIN,
744 .data.ptr = d,
745 };
746
747 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
748 if (r < 0) {
749 r = -errno;
750 goto fail;
751 }
752
753 if (ret)
754 *ret = d;
755
756 return 0;
757
758 fail:
759 if (added) {
760 d->fd = safe_close(d->fd);
761 hashmap_remove(e->signal_data, &d->priority);
762 free(d);
763 }
764
765 return r;
766 }
767
768 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
769 assert(e);
770 assert(d);
771
772 /* Turns off the specified signal in the signal data
773 * object. If the signal mask of the object becomes empty that
774 * way removes it. */
775
776 if (sigismember(&d->sigset, sig) == 0)
777 return;
778
779 assert_se(sigdelset(&d->sigset, sig) >= 0);
780
781 if (sigisemptyset(&d->sigset)) {
782
783 /* If all the mask is all-zero we can get rid of the structure */
784 hashmap_remove(e->signal_data, &d->priority);
785 safe_close(d->fd);
786 free(d);
787 return;
788 }
789
790 assert(d->fd >= 0);
791
792 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
793 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
794 }
795
796 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
797 struct signal_data *d;
798 static const int64_t zero_priority = 0;
799
800 assert(e);
801
802 /* Rechecks if the specified signal is still something we are
803 * interested in. If not, we'll unmask it, and possibly drop
804 * the signalfd for it. */
805
806 if (sig == SIGCHLD &&
807 e->n_enabled_child_sources > 0)
808 return;
809
810 if (e->signal_sources &&
811 e->signal_sources[sig] &&
812 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
813 return;
814
815 /*
816 * The specified signal might be enabled in three different queues:
817 *
818 * 1) the one that belongs to the priority passed (if it is non-NULL)
819 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
820 * 3) the 0 priority (to cover the SIGCHLD case)
821 *
822 * Hence, let's remove it from all three here.
823 */
824
825 if (priority) {
826 d = hashmap_get(e->signal_data, priority);
827 if (d)
828 event_unmask_signal_data(e, d, sig);
829 }
830
831 if (e->signal_sources && e->signal_sources[sig]) {
832 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
833 if (d)
834 event_unmask_signal_data(e, d, sig);
835 }
836
837 d = hashmap_get(e->signal_data, &zero_priority);
838 if (d)
839 event_unmask_signal_data(e, d, sig);
840 }
841
842 static void source_disconnect(sd_event_source *s) {
843 sd_event *event;
844
845 assert(s);
846
847 if (!s->event)
848 return;
849
850 assert(s->event->n_sources > 0);
851
852 switch (s->type) {
853
854 case SOURCE_IO:
855 if (s->io.fd >= 0)
856 source_io_unregister(s);
857
858 break;
859
860 case SOURCE_TIME_REALTIME:
861 case SOURCE_TIME_BOOTTIME:
862 case SOURCE_TIME_MONOTONIC:
863 case SOURCE_TIME_REALTIME_ALARM:
864 case SOURCE_TIME_BOOTTIME_ALARM: {
865 struct clock_data *d;
866
867 d = event_get_clock_data(s->event, s->type);
868 assert(d);
869
870 prioq_remove(d->earliest, s, &s->time.earliest_index);
871 prioq_remove(d->latest, s, &s->time.latest_index);
872 d->needs_rearm = true;
873 break;
874 }
875
876 case SOURCE_SIGNAL:
877 if (s->signal.sig > 0) {
878
879 if (s->event->signal_sources)
880 s->event->signal_sources[s->signal.sig] = NULL;
881
882 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
883 }
884
885 break;
886
887 case SOURCE_CHILD:
888 if (s->child.pid > 0) {
889 if (s->enabled != SD_EVENT_OFF) {
890 assert(s->event->n_enabled_child_sources > 0);
891 s->event->n_enabled_child_sources--;
892 }
893
894 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
895 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
896 }
897
898 break;
899
900 case SOURCE_DEFER:
901 /* nothing */
902 break;
903
904 case SOURCE_POST:
905 set_remove(s->event->post_sources, s);
906 break;
907
908 case SOURCE_EXIT:
909 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
910 break;
911
912 case SOURCE_INOTIFY: {
913 struct inode_data *inode_data;
914
915 inode_data = s->inotify.inode_data;
916 if (inode_data) {
917 struct inotify_data *inotify_data;
918 assert_se(inotify_data = inode_data->inotify_data);
919
920 /* Detach this event source from the inode object */
921 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
922 s->inotify.inode_data = NULL;
923
924 if (s->pending) {
925 assert(inotify_data->n_pending > 0);
926 inotify_data->n_pending--;
927 }
928
929 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
930 * continued to being watched. That's because inotify doesn't really have an API for that: we
931 * can only change watch masks with access to the original inode either by fd or by path. But
932 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
933 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
934 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
935 * there), but given the need for open_by_handle_at() which is privileged and not universally
936 * available this would be quite an incomplete solution. Hence we go the other way, leave the
937 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
938 * anymore after reception. Yes, this sucks, but … Linux … */
939
940 /* Maybe release the inode data (and its inotify) */
941 event_gc_inode_data(s->event, inode_data);
942 }
943
944 break;
945 }
946
947 default:
948 assert_not_reached("Wut? I shouldn't exist.");
949 }
950
951 if (s->pending)
952 prioq_remove(s->event->pending, s, &s->pending_index);
953
954 if (s->prepare)
955 prioq_remove(s->event->prepare, s, &s->prepare_index);
956
957 event = s->event;
958
959 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
960 s->event = NULL;
961 LIST_REMOVE(sources, event->sources, s);
962 event->n_sources--;
963
964 if (!s->floating)
965 sd_event_unref(event);
966 }
967
968 static void source_free(sd_event_source *s) {
969 assert(s);
970
971 source_disconnect(s);
972
973 if (s->type == SOURCE_IO && s->io.owned)
974 s->io.fd = safe_close(s->io.fd);
975
976 if (s->destroy_callback)
977 s->destroy_callback(s->userdata);
978
979 free(s->description);
980 free(s);
981 }
982 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
983
984 static int source_set_pending(sd_event_source *s, bool b) {
985 int r;
986
987 assert(s);
988 assert(s->type != SOURCE_EXIT);
989
990 if (s->pending == b)
991 return 0;
992
993 s->pending = b;
994
995 if (b) {
996 s->pending_iteration = s->event->iteration;
997
998 r = prioq_put(s->event->pending, s, &s->pending_index);
999 if (r < 0) {
1000 s->pending = false;
1001 return r;
1002 }
1003 } else
1004 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1005
1006 if (EVENT_SOURCE_IS_TIME(s->type)) {
1007 struct clock_data *d;
1008
1009 d = event_get_clock_data(s->event, s->type);
1010 assert(d);
1011
1012 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1013 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1014 d->needs_rearm = true;
1015 }
1016
1017 if (s->type == SOURCE_SIGNAL && !b) {
1018 struct signal_data *d;
1019
1020 d = hashmap_get(s->event->signal_data, &s->priority);
1021 if (d && d->current == s)
1022 d->current = NULL;
1023 }
1024
1025 if (s->type == SOURCE_INOTIFY) {
1026
1027 assert(s->inotify.inode_data);
1028 assert(s->inotify.inode_data->inotify_data);
1029
1030 if (b)
1031 s->inotify.inode_data->inotify_data->n_pending ++;
1032 else {
1033 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1034 s->inotify.inode_data->inotify_data->n_pending --;
1035 }
1036 }
1037
1038 return 0;
1039 }
1040
1041 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1042 sd_event_source *s;
1043
1044 assert(e);
1045
1046 s = new(sd_event_source, 1);
1047 if (!s)
1048 return NULL;
1049
1050 *s = (struct sd_event_source) {
1051 .n_ref = 1,
1052 .event = e,
1053 .floating = floating,
1054 .type = type,
1055 .pending_index = PRIOQ_IDX_NULL,
1056 .prepare_index = PRIOQ_IDX_NULL,
1057 };
1058
1059 if (!floating)
1060 sd_event_ref(e);
1061
1062 LIST_PREPEND(sources, e->sources, s);
1063 e->n_sources++;
1064
1065 return s;
1066 }
1067
1068 _public_ int sd_event_add_io(
1069 sd_event *e,
1070 sd_event_source **ret,
1071 int fd,
1072 uint32_t events,
1073 sd_event_io_handler_t callback,
1074 void *userdata) {
1075
1076 _cleanup_(source_freep) sd_event_source *s = NULL;
1077 int r;
1078
1079 assert_return(e, -EINVAL);
1080 assert_return(e = event_resolve(e), -ENOPKG);
1081 assert_return(fd >= 0, -EBADF);
1082 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1083 assert_return(callback, -EINVAL);
1084 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1085 assert_return(!event_pid_changed(e), -ECHILD);
1086
1087 s = source_new(e, !ret, SOURCE_IO);
1088 if (!s)
1089 return -ENOMEM;
1090
1091 s->wakeup = WAKEUP_EVENT_SOURCE;
1092 s->io.fd = fd;
1093 s->io.events = events;
1094 s->io.callback = callback;
1095 s->userdata = userdata;
1096 s->enabled = SD_EVENT_ON;
1097
1098 r = source_io_register(s, s->enabled, events);
1099 if (r < 0)
1100 return r;
1101
1102 if (ret)
1103 *ret = s;
1104 TAKE_PTR(s);
1105
1106 return 0;
1107 }
1108
1109 static void initialize_perturb(sd_event *e) {
1110 sd_id128_t bootid = {};
1111
1112 /* When we sleep for longer, we try to realign the wakeup to
1113 the same time within each minute/second/250ms, so that
1114 events all across the system can be coalesced into a single
1115 CPU wakeup. However, let's take some system-specific
1116 randomness for this value, so that in a network of systems
1117 with synced clocks timer events are distributed a
1118 bit. Here, we calculate a perturbation usec offset from the
1119 boot ID. */
1120
1121 if (_likely_(e->perturb != USEC_INFINITY))
1122 return;
1123
1124 if (sd_id128_get_boot(&bootid) >= 0)
1125 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1126 }
1127
1128 static int event_setup_timer_fd(
1129 sd_event *e,
1130 struct clock_data *d,
1131 clockid_t clock) {
1132
1133 struct epoll_event ev;
1134 int r, fd;
1135
1136 assert(e);
1137 assert(d);
1138
1139 if (_likely_(d->fd >= 0))
1140 return 0;
1141
1142 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1143 if (fd < 0)
1144 return -errno;
1145
1146 fd = fd_move_above_stdio(fd);
1147
1148 ev = (struct epoll_event) {
1149 .events = EPOLLIN,
1150 .data.ptr = d,
1151 };
1152
1153 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1154 if (r < 0) {
1155 safe_close(fd);
1156 return -errno;
1157 }
1158
1159 d->fd = fd;
1160 return 0;
1161 }
1162
1163 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1164 assert(s);
1165
1166 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1167 }
1168
1169 _public_ int sd_event_add_time(
1170 sd_event *e,
1171 sd_event_source **ret,
1172 clockid_t clock,
1173 uint64_t usec,
1174 uint64_t accuracy,
1175 sd_event_time_handler_t callback,
1176 void *userdata) {
1177
1178 EventSourceType type;
1179 _cleanup_(source_freep) sd_event_source *s = NULL;
1180 struct clock_data *d;
1181 int r;
1182
1183 assert_return(e, -EINVAL);
1184 assert_return(e = event_resolve(e), -ENOPKG);
1185 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1186 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1187 assert_return(!event_pid_changed(e), -ECHILD);
1188
1189 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1190 return -EOPNOTSUPP;
1191
1192 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1193 if (type < 0)
1194 return -EOPNOTSUPP;
1195
1196 if (!callback)
1197 callback = time_exit_callback;
1198
1199 d = event_get_clock_data(e, type);
1200 assert(d);
1201
1202 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1203 if (r < 0)
1204 return r;
1205
1206 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1207 if (r < 0)
1208 return r;
1209
1210 if (d->fd < 0) {
1211 r = event_setup_timer_fd(e, d, clock);
1212 if (r < 0)
1213 return r;
1214 }
1215
1216 s = source_new(e, !ret, type);
1217 if (!s)
1218 return -ENOMEM;
1219
1220 s->time.next = usec;
1221 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1222 s->time.callback = callback;
1223 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1224 s->userdata = userdata;
1225 s->enabled = SD_EVENT_ONESHOT;
1226
1227 d->needs_rearm = true;
1228
1229 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1230 if (r < 0)
1231 return r;
1232
1233 r = prioq_put(d->latest, s, &s->time.latest_index);
1234 if (r < 0)
1235 return r;
1236
1237 if (ret)
1238 *ret = s;
1239 TAKE_PTR(s);
1240
1241 return 0;
1242 }
1243
1244 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1245 assert(s);
1246
1247 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1248 }
1249
1250 _public_ int sd_event_add_signal(
1251 sd_event *e,
1252 sd_event_source **ret,
1253 int sig,
1254 sd_event_signal_handler_t callback,
1255 void *userdata) {
1256
1257 _cleanup_(source_freep) sd_event_source *s = NULL;
1258 struct signal_data *d;
1259 sigset_t ss;
1260 int r;
1261
1262 assert_return(e, -EINVAL);
1263 assert_return(e = event_resolve(e), -ENOPKG);
1264 assert_return(SIGNAL_VALID(sig), -EINVAL);
1265 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1266 assert_return(!event_pid_changed(e), -ECHILD);
1267
1268 if (!callback)
1269 callback = signal_exit_callback;
1270
1271 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1272 if (r != 0)
1273 return -r;
1274
1275 if (!sigismember(&ss, sig))
1276 return -EBUSY;
1277
1278 if (!e->signal_sources) {
1279 e->signal_sources = new0(sd_event_source*, _NSIG);
1280 if (!e->signal_sources)
1281 return -ENOMEM;
1282 } else if (e->signal_sources[sig])
1283 return -EBUSY;
1284
1285 s = source_new(e, !ret, SOURCE_SIGNAL);
1286 if (!s)
1287 return -ENOMEM;
1288
1289 s->signal.sig = sig;
1290 s->signal.callback = callback;
1291 s->userdata = userdata;
1292 s->enabled = SD_EVENT_ON;
1293
1294 e->signal_sources[sig] = s;
1295
1296 r = event_make_signal_data(e, sig, &d);
1297 if (r < 0)
1298 return r;
1299
1300 /* Use the signal name as description for the event source by default */
1301 (void) sd_event_source_set_description(s, signal_to_string(sig));
1302
1303 if (ret)
1304 *ret = s;
1305 TAKE_PTR(s);
1306
1307 return 0;
1308 }
1309
1310 _public_ int sd_event_add_child(
1311 sd_event *e,
1312 sd_event_source **ret,
1313 pid_t pid,
1314 int options,
1315 sd_event_child_handler_t callback,
1316 void *userdata) {
1317
1318 _cleanup_(source_freep) sd_event_source *s = NULL;
1319 int r;
1320
1321 assert_return(e, -EINVAL);
1322 assert_return(e = event_resolve(e), -ENOPKG);
1323 assert_return(pid > 1, -EINVAL);
1324 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1325 assert_return(options != 0, -EINVAL);
1326 assert_return(callback, -EINVAL);
1327 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1328 assert_return(!event_pid_changed(e), -ECHILD);
1329
1330 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1331 if (r < 0)
1332 return r;
1333
1334 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1335 return -EBUSY;
1336
1337 s = source_new(e, !ret, SOURCE_CHILD);
1338 if (!s)
1339 return -ENOMEM;
1340
1341 s->child.pid = pid;
1342 s->child.options = options;
1343 s->child.callback = callback;
1344 s->userdata = userdata;
1345 s->enabled = SD_EVENT_ONESHOT;
1346
1347 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1348 if (r < 0)
1349 return r;
1350
1351 e->n_enabled_child_sources++;
1352
1353 r = event_make_signal_data(e, SIGCHLD, NULL);
1354 if (r < 0) {
1355 e->n_enabled_child_sources--;
1356 return r;
1357 }
1358
1359 e->need_process_child = true;
1360
1361 if (ret)
1362 *ret = s;
1363 TAKE_PTR(s);
1364
1365 return 0;
1366 }
1367
1368 _public_ int sd_event_add_defer(
1369 sd_event *e,
1370 sd_event_source **ret,
1371 sd_event_handler_t callback,
1372 void *userdata) {
1373
1374 _cleanup_(source_freep) sd_event_source *s = NULL;
1375 int r;
1376
1377 assert_return(e, -EINVAL);
1378 assert_return(e = event_resolve(e), -ENOPKG);
1379 assert_return(callback, -EINVAL);
1380 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1381 assert_return(!event_pid_changed(e), -ECHILD);
1382
1383 s = source_new(e, !ret, SOURCE_DEFER);
1384 if (!s)
1385 return -ENOMEM;
1386
1387 s->defer.callback = callback;
1388 s->userdata = userdata;
1389 s->enabled = SD_EVENT_ONESHOT;
1390
1391 r = source_set_pending(s, true);
1392 if (r < 0)
1393 return r;
1394
1395 if (ret)
1396 *ret = s;
1397 TAKE_PTR(s);
1398
1399 return 0;
1400 }
1401
1402 _public_ int sd_event_add_post(
1403 sd_event *e,
1404 sd_event_source **ret,
1405 sd_event_handler_t callback,
1406 void *userdata) {
1407
1408 _cleanup_(source_freep) sd_event_source *s = NULL;
1409 int r;
1410
1411 assert_return(e, -EINVAL);
1412 assert_return(e = event_resolve(e), -ENOPKG);
1413 assert_return(callback, -EINVAL);
1414 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1415 assert_return(!event_pid_changed(e), -ECHILD);
1416
1417 r = set_ensure_allocated(&e->post_sources, NULL);
1418 if (r < 0)
1419 return r;
1420
1421 s = source_new(e, !ret, SOURCE_POST);
1422 if (!s)
1423 return -ENOMEM;
1424
1425 s->post.callback = callback;
1426 s->userdata = userdata;
1427 s->enabled = SD_EVENT_ON;
1428
1429 r = set_put(e->post_sources, s);
1430 if (r < 0)
1431 return r;
1432
1433 if (ret)
1434 *ret = s;
1435 TAKE_PTR(s);
1436
1437 return 0;
1438 }
1439
1440 _public_ int sd_event_add_exit(
1441 sd_event *e,
1442 sd_event_source **ret,
1443 sd_event_handler_t callback,
1444 void *userdata) {
1445
1446 _cleanup_(source_freep) sd_event_source *s = NULL;
1447 int r;
1448
1449 assert_return(e, -EINVAL);
1450 assert_return(e = event_resolve(e), -ENOPKG);
1451 assert_return(callback, -EINVAL);
1452 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1453 assert_return(!event_pid_changed(e), -ECHILD);
1454
1455 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1456 if (r < 0)
1457 return r;
1458
1459 s = source_new(e, !ret, SOURCE_EXIT);
1460 if (!s)
1461 return -ENOMEM;
1462
1463 s->exit.callback = callback;
1464 s->userdata = userdata;
1465 s->exit.prioq_index = PRIOQ_IDX_NULL;
1466 s->enabled = SD_EVENT_ONESHOT;
1467
1468 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1469 if (r < 0)
1470 return r;
1471
1472 if (ret)
1473 *ret = s;
1474 TAKE_PTR(s);
1475
1476 return 0;
1477 }
1478
1479 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1480 assert(e);
1481
1482 if (!d)
1483 return;
1484
1485 assert(hashmap_isempty(d->inodes));
1486 assert(hashmap_isempty(d->wd));
1487
1488 if (d->buffer_filled > 0)
1489 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1490
1491 hashmap_free(d->inodes);
1492 hashmap_free(d->wd);
1493
1494 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1495
1496 if (d->fd >= 0) {
1497 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1498 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1499
1500 safe_close(d->fd);
1501 }
1502 free(d);
1503 }
1504
1505 static int event_make_inotify_data(
1506 sd_event *e,
1507 int64_t priority,
1508 struct inotify_data **ret) {
1509
1510 _cleanup_close_ int fd = -1;
1511 struct inotify_data *d;
1512 struct epoll_event ev;
1513 int r;
1514
1515 assert(e);
1516
1517 d = hashmap_get(e->inotify_data, &priority);
1518 if (d) {
1519 if (ret)
1520 *ret = d;
1521 return 0;
1522 }
1523
1524 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1525 if (fd < 0)
1526 return -errno;
1527
1528 fd = fd_move_above_stdio(fd);
1529
1530 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1531 if (r < 0)
1532 return r;
1533
1534 d = new(struct inotify_data, 1);
1535 if (!d)
1536 return -ENOMEM;
1537
1538 *d = (struct inotify_data) {
1539 .wakeup = WAKEUP_INOTIFY_DATA,
1540 .fd = TAKE_FD(fd),
1541 .priority = priority,
1542 };
1543
1544 r = hashmap_put(e->inotify_data, &d->priority, d);
1545 if (r < 0) {
1546 d->fd = safe_close(d->fd);
1547 free(d);
1548 return r;
1549 }
1550
1551 ev = (struct epoll_event) {
1552 .events = EPOLLIN,
1553 .data.ptr = d,
1554 };
1555
1556 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1557 r = -errno;
1558 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1559 * remove the fd from the epoll first, which we don't want as we couldn't
1560 * add it in the first place. */
1561 event_free_inotify_data(e, d);
1562 return r;
1563 }
1564
1565 if (ret)
1566 *ret = d;
1567
1568 return 1;
1569 }
1570
1571 static int inode_data_compare(const void *a, const void *b) {
1572 const struct inode_data *x = a, *y = b;
1573
1574 assert(x);
1575 assert(y);
1576
1577 if (x->dev < y->dev)
1578 return -1;
1579 if (x->dev > y->dev)
1580 return 1;
1581
1582 if (x->ino < y->ino)
1583 return -1;
1584 if (x->ino > y->ino)
1585 return 1;
1586
1587 return 0;
1588 }
1589
1590 static void inode_data_hash_func(const void *p, struct siphash *state) {
1591 const struct inode_data *d = p;
1592
1593 assert(p);
1594
1595 siphash24_compress(&d->dev, sizeof(d->dev), state);
1596 siphash24_compress(&d->ino, sizeof(d->ino), state);
1597 }
1598
1599 const struct hash_ops inode_data_hash_ops = {
1600 .hash = inode_data_hash_func,
1601 .compare = inode_data_compare
1602 };
1603
1604 static void event_free_inode_data(
1605 sd_event *e,
1606 struct inode_data *d) {
1607
1608 assert(e);
1609
1610 if (!d)
1611 return;
1612
1613 assert(!d->event_sources);
1614
1615 if (d->fd >= 0) {
1616 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1617 safe_close(d->fd);
1618 }
1619
1620 if (d->inotify_data) {
1621
1622 if (d->wd >= 0) {
1623 if (d->inotify_data->fd >= 0) {
1624 /* So here's a problem. At the time this runs the watch descriptor might already be
1625 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1626 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1627 * likely case to happen. */
1628
1629 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1630 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1631 }
1632
1633 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1634 }
1635
1636 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1637 }
1638
1639 free(d);
1640 }
1641
1642 static void event_gc_inode_data(
1643 sd_event *e,
1644 struct inode_data *d) {
1645
1646 struct inotify_data *inotify_data;
1647
1648 assert(e);
1649
1650 if (!d)
1651 return;
1652
1653 if (d->event_sources)
1654 return;
1655
1656 inotify_data = d->inotify_data;
1657 event_free_inode_data(e, d);
1658
1659 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1660 event_free_inotify_data(e, inotify_data);
1661 }
1662
1663 static int event_make_inode_data(
1664 sd_event *e,
1665 struct inotify_data *inotify_data,
1666 dev_t dev,
1667 ino_t ino,
1668 struct inode_data **ret) {
1669
1670 struct inode_data *d, key;
1671 int r;
1672
1673 assert(e);
1674 assert(inotify_data);
1675
1676 key = (struct inode_data) {
1677 .ino = ino,
1678 .dev = dev,
1679 };
1680
1681 d = hashmap_get(inotify_data->inodes, &key);
1682 if (d) {
1683 if (ret)
1684 *ret = d;
1685
1686 return 0;
1687 }
1688
1689 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1690 if (r < 0)
1691 return r;
1692
1693 d = new(struct inode_data, 1);
1694 if (!d)
1695 return -ENOMEM;
1696
1697 *d = (struct inode_data) {
1698 .dev = dev,
1699 .ino = ino,
1700 .wd = -1,
1701 .fd = -1,
1702 .inotify_data = inotify_data,
1703 };
1704
1705 r = hashmap_put(inotify_data->inodes, d, d);
1706 if (r < 0) {
1707 free(d);
1708 return r;
1709 }
1710
1711 if (ret)
1712 *ret = d;
1713
1714 return 1;
1715 }
1716
1717 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1718 bool excl_unlink = true;
1719 uint32_t combined = 0;
1720 sd_event_source *s;
1721
1722 assert(d);
1723
1724 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1725 * the IN_EXCL_UNLINK flag is ANDed instead.
1726 *
1727 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1728 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1729 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1730 * events we don't care for client-side. */
1731
1732 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1733
1734 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1735 excl_unlink = false;
1736
1737 combined |= s->inotify.mask;
1738 }
1739
1740 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1741 }
1742
1743 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1744 uint32_t combined_mask;
1745 int wd, r;
1746
1747 assert(d);
1748 assert(d->fd >= 0);
1749
1750 combined_mask = inode_data_determine_mask(d);
1751
1752 if (d->wd >= 0 && combined_mask == d->combined_mask)
1753 return 0;
1754
1755 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1756 if (r < 0)
1757 return r;
1758
1759 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1760 if (wd < 0)
1761 return -errno;
1762
1763 if (d->wd < 0) {
1764 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1765 if (r < 0) {
1766 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1767 return r;
1768 }
1769
1770 d->wd = wd;
1771
1772 } else if (d->wd != wd) {
1773
1774 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1775 (void) inotify_rm_watch(d->fd, wd);
1776 return -EINVAL;
1777 }
1778
1779 d->combined_mask = combined_mask;
1780 return 1;
1781 }
1782
1783 _public_ int sd_event_add_inotify(
1784 sd_event *e,
1785 sd_event_source **ret,
1786 const char *path,
1787 uint32_t mask,
1788 sd_event_inotify_handler_t callback,
1789 void *userdata) {
1790
1791 struct inotify_data *inotify_data = NULL;
1792 struct inode_data *inode_data = NULL;
1793 _cleanup_close_ int fd = -1;
1794 _cleanup_(source_freep) sd_event_source *s = NULL;
1795 struct stat st;
1796 int r;
1797
1798 assert_return(e, -EINVAL);
1799 assert_return(e = event_resolve(e), -ENOPKG);
1800 assert_return(path, -EINVAL);
1801 assert_return(callback, -EINVAL);
1802 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1803 assert_return(!event_pid_changed(e), -ECHILD);
1804
1805 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1806 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1807 * the user can't use them for us. */
1808 if (mask & IN_MASK_ADD)
1809 return -EINVAL;
1810
1811 fd = open(path, O_PATH|O_CLOEXEC|
1812 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1813 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1814 if (fd < 0)
1815 return -errno;
1816
1817 if (fstat(fd, &st) < 0)
1818 return -errno;
1819
1820 s = source_new(e, !ret, SOURCE_INOTIFY);
1821 if (!s)
1822 return -ENOMEM;
1823
1824 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1825 s->inotify.mask = mask;
1826 s->inotify.callback = callback;
1827 s->userdata = userdata;
1828
1829 /* Allocate an inotify object for this priority, and an inode object within it */
1830 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1831 if (r < 0)
1832 return r;
1833
1834 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1835 if (r < 0) {
1836 event_free_inotify_data(e, inotify_data);
1837 return r;
1838 }
1839
1840 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1841 * the event source, until then, for which we need the original inode. */
1842 if (inode_data->fd < 0) {
1843 inode_data->fd = TAKE_FD(fd);
1844 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1845 }
1846
1847 /* Link our event source to the inode data object */
1848 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1849 s->inotify.inode_data = inode_data;
1850
1851 /* Actually realize the watch now */
1852 r = inode_data_realize_watch(e, inode_data);
1853 if (r < 0)
1854 return r;
1855
1856 (void) sd_event_source_set_description(s, path);
1857
1858 if (ret)
1859 *ret = s;
1860 TAKE_PTR(s);
1861
1862 return 0;
1863 }
1864
1865 static sd_event_source* event_source_free(sd_event_source *s) {
1866 if (!s)
1867 return NULL;
1868
1869 /* Here's a special hack: when we are called from a
1870 * dispatch handler we won't free the event source
1871 * immediately, but we will detach the fd from the
1872 * epoll. This way it is safe for the caller to unref
1873 * the event source and immediately close the fd, but
1874 * we still retain a valid event source object after
1875 * the callback. */
1876
1877 if (s->dispatching) {
1878 if (s->type == SOURCE_IO)
1879 source_io_unregister(s);
1880
1881 source_disconnect(s);
1882 } else
1883 source_free(s);
1884
1885 return NULL;
1886 }
1887
1888 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1889
1890 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1891 assert_return(s, -EINVAL);
1892 assert_return(!event_pid_changed(s->event), -ECHILD);
1893
1894 return free_and_strdup(&s->description, description);
1895 }
1896
1897 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1898 assert_return(s, -EINVAL);
1899 assert_return(description, -EINVAL);
1900 assert_return(s->description, -ENXIO);
1901 assert_return(!event_pid_changed(s->event), -ECHILD);
1902
1903 *description = s->description;
1904 return 0;
1905 }
1906
1907 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1908 assert_return(s, NULL);
1909
1910 return s->event;
1911 }
1912
1913 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1914 assert_return(s, -EINVAL);
1915 assert_return(s->type != SOURCE_EXIT, -EDOM);
1916 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1917 assert_return(!event_pid_changed(s->event), -ECHILD);
1918
1919 return s->pending;
1920 }
1921
1922 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1923 assert_return(s, -EINVAL);
1924 assert_return(s->type == SOURCE_IO, -EDOM);
1925 assert_return(!event_pid_changed(s->event), -ECHILD);
1926
1927 return s->io.fd;
1928 }
1929
1930 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1931 int r;
1932
1933 assert_return(s, -EINVAL);
1934 assert_return(fd >= 0, -EBADF);
1935 assert_return(s->type == SOURCE_IO, -EDOM);
1936 assert_return(!event_pid_changed(s->event), -ECHILD);
1937
1938 if (s->io.fd == fd)
1939 return 0;
1940
1941 if (s->enabled == SD_EVENT_OFF) {
1942 s->io.fd = fd;
1943 s->io.registered = false;
1944 } else {
1945 int saved_fd;
1946
1947 saved_fd = s->io.fd;
1948 assert(s->io.registered);
1949
1950 s->io.fd = fd;
1951 s->io.registered = false;
1952
1953 r = source_io_register(s, s->enabled, s->io.events);
1954 if (r < 0) {
1955 s->io.fd = saved_fd;
1956 s->io.registered = true;
1957 return r;
1958 }
1959
1960 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1961 }
1962
1963 return 0;
1964 }
1965
1966 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1967 assert_return(s, -EINVAL);
1968 assert_return(s->type == SOURCE_IO, -EDOM);
1969
1970 return s->io.owned;
1971 }
1972
1973 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1974 assert_return(s, -EINVAL);
1975 assert_return(s->type == SOURCE_IO, -EDOM);
1976
1977 s->io.owned = own;
1978 return 0;
1979 }
1980
1981 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1982 assert_return(s, -EINVAL);
1983 assert_return(events, -EINVAL);
1984 assert_return(s->type == SOURCE_IO, -EDOM);
1985 assert_return(!event_pid_changed(s->event), -ECHILD);
1986
1987 *events = s->io.events;
1988 return 0;
1989 }
1990
1991 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1992 int r;
1993
1994 assert_return(s, -EINVAL);
1995 assert_return(s->type == SOURCE_IO, -EDOM);
1996 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1997 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1998 assert_return(!event_pid_changed(s->event), -ECHILD);
1999
2000 /* edge-triggered updates are never skipped, so we can reset edges */
2001 if (s->io.events == events && !(events & EPOLLET))
2002 return 0;
2003
2004 r = source_set_pending(s, false);
2005 if (r < 0)
2006 return r;
2007
2008 if (s->enabled != SD_EVENT_OFF) {
2009 r = source_io_register(s, s->enabled, events);
2010 if (r < 0)
2011 return r;
2012 }
2013
2014 s->io.events = events;
2015
2016 return 0;
2017 }
2018
2019 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2020 assert_return(s, -EINVAL);
2021 assert_return(revents, -EINVAL);
2022 assert_return(s->type == SOURCE_IO, -EDOM);
2023 assert_return(s->pending, -ENODATA);
2024 assert_return(!event_pid_changed(s->event), -ECHILD);
2025
2026 *revents = s->io.revents;
2027 return 0;
2028 }
2029
2030 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2031 assert_return(s, -EINVAL);
2032 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2033 assert_return(!event_pid_changed(s->event), -ECHILD);
2034
2035 return s->signal.sig;
2036 }
2037
2038 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2039 assert_return(s, -EINVAL);
2040 assert_return(!event_pid_changed(s->event), -ECHILD);
2041
2042 *priority = s->priority;
2043 return 0;
2044 }
2045
2046 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2047 bool rm_inotify = false, rm_inode = false;
2048 struct inotify_data *new_inotify_data = NULL;
2049 struct inode_data *new_inode_data = NULL;
2050 int r;
2051
2052 assert_return(s, -EINVAL);
2053 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2054 assert_return(!event_pid_changed(s->event), -ECHILD);
2055
2056 if (s->priority == priority)
2057 return 0;
2058
2059 if (s->type == SOURCE_INOTIFY) {
2060 struct inode_data *old_inode_data;
2061
2062 assert(s->inotify.inode_data);
2063 old_inode_data = s->inotify.inode_data;
2064
2065 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2066 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2067 * events we allow priority changes only until the first following iteration. */
2068 if (old_inode_data->fd < 0)
2069 return -EOPNOTSUPP;
2070
2071 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2072 if (r < 0)
2073 return r;
2074 rm_inotify = r > 0;
2075
2076 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2077 if (r < 0)
2078 goto fail;
2079 rm_inode = r > 0;
2080
2081 if (new_inode_data->fd < 0) {
2082 /* Duplicate the fd for the new inode object if we don't have any yet */
2083 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2084 if (new_inode_data->fd < 0) {
2085 r = -errno;
2086 goto fail;
2087 }
2088
2089 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2090 }
2091
2092 /* Move the event source to the new inode data structure */
2093 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2094 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2095 s->inotify.inode_data = new_inode_data;
2096
2097 /* Now create the new watch */
2098 r = inode_data_realize_watch(s->event, new_inode_data);
2099 if (r < 0) {
2100 /* Move it back */
2101 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2102 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2103 s->inotify.inode_data = old_inode_data;
2104 goto fail;
2105 }
2106
2107 s->priority = priority;
2108
2109 event_gc_inode_data(s->event, old_inode_data);
2110
2111 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2112 struct signal_data *old, *d;
2113
2114 /* Move us from the signalfd belonging to the old
2115 * priority to the signalfd of the new priority */
2116
2117 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2118
2119 s->priority = priority;
2120
2121 r = event_make_signal_data(s->event, s->signal.sig, &d);
2122 if (r < 0) {
2123 s->priority = old->priority;
2124 return r;
2125 }
2126
2127 event_unmask_signal_data(s->event, old, s->signal.sig);
2128 } else
2129 s->priority = priority;
2130
2131 if (s->pending)
2132 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2133
2134 if (s->prepare)
2135 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2136
2137 if (s->type == SOURCE_EXIT)
2138 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2139
2140 return 0;
2141
2142 fail:
2143 if (rm_inode)
2144 event_free_inode_data(s->event, new_inode_data);
2145
2146 if (rm_inotify)
2147 event_free_inotify_data(s->event, new_inotify_data);
2148
2149 return r;
2150 }
2151
2152 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2153 assert_return(s, -EINVAL);
2154 assert_return(m, -EINVAL);
2155 assert_return(!event_pid_changed(s->event), -ECHILD);
2156
2157 *m = s->enabled;
2158 return 0;
2159 }
2160
2161 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2162 int r;
2163
2164 assert_return(s, -EINVAL);
2165 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2166 assert_return(!event_pid_changed(s->event), -ECHILD);
2167
2168 /* If we are dead anyway, we are fine with turning off
2169 * sources, but everything else needs to fail. */
2170 if (s->event->state == SD_EVENT_FINISHED)
2171 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2172
2173 if (s->enabled == m)
2174 return 0;
2175
2176 if (m == SD_EVENT_OFF) {
2177
2178 /* Unset the pending flag when this event source is disabled */
2179 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2180 r = source_set_pending(s, false);
2181 if (r < 0)
2182 return r;
2183 }
2184
2185 switch (s->type) {
2186
2187 case SOURCE_IO:
2188 source_io_unregister(s);
2189 s->enabled = m;
2190 break;
2191
2192 case SOURCE_TIME_REALTIME:
2193 case SOURCE_TIME_BOOTTIME:
2194 case SOURCE_TIME_MONOTONIC:
2195 case SOURCE_TIME_REALTIME_ALARM:
2196 case SOURCE_TIME_BOOTTIME_ALARM: {
2197 struct clock_data *d;
2198
2199 s->enabled = m;
2200 d = event_get_clock_data(s->event, s->type);
2201 assert(d);
2202
2203 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2204 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2205 d->needs_rearm = true;
2206 break;
2207 }
2208
2209 case SOURCE_SIGNAL:
2210 s->enabled = m;
2211
2212 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2213 break;
2214
2215 case SOURCE_CHILD:
2216 s->enabled = m;
2217
2218 assert(s->event->n_enabled_child_sources > 0);
2219 s->event->n_enabled_child_sources--;
2220
2221 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2222 break;
2223
2224 case SOURCE_EXIT:
2225 s->enabled = m;
2226 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2227 break;
2228
2229 case SOURCE_DEFER:
2230 case SOURCE_POST:
2231 case SOURCE_INOTIFY:
2232 s->enabled = m;
2233 break;
2234
2235 default:
2236 assert_not_reached("Wut? I shouldn't exist.");
2237 }
2238
2239 } else {
2240
2241 /* Unset the pending flag when this event source is enabled */
2242 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2243 r = source_set_pending(s, false);
2244 if (r < 0)
2245 return r;
2246 }
2247
2248 switch (s->type) {
2249
2250 case SOURCE_IO:
2251 r = source_io_register(s, m, s->io.events);
2252 if (r < 0)
2253 return r;
2254
2255 s->enabled = m;
2256 break;
2257
2258 case SOURCE_TIME_REALTIME:
2259 case SOURCE_TIME_BOOTTIME:
2260 case SOURCE_TIME_MONOTONIC:
2261 case SOURCE_TIME_REALTIME_ALARM:
2262 case SOURCE_TIME_BOOTTIME_ALARM: {
2263 struct clock_data *d;
2264
2265 s->enabled = m;
2266 d = event_get_clock_data(s->event, s->type);
2267 assert(d);
2268
2269 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2270 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2271 d->needs_rearm = true;
2272 break;
2273 }
2274
2275 case SOURCE_SIGNAL:
2276
2277 s->enabled = m;
2278
2279 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2280 if (r < 0) {
2281 s->enabled = SD_EVENT_OFF;
2282 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2283 return r;
2284 }
2285
2286 break;
2287
2288 case SOURCE_CHILD:
2289
2290 if (s->enabled == SD_EVENT_OFF)
2291 s->event->n_enabled_child_sources++;
2292
2293 s->enabled = m;
2294
2295 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2296 if (r < 0) {
2297 s->enabled = SD_EVENT_OFF;
2298 s->event->n_enabled_child_sources--;
2299 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2300 return r;
2301 }
2302
2303 break;
2304
2305 case SOURCE_EXIT:
2306 s->enabled = m;
2307 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2308 break;
2309
2310 case SOURCE_DEFER:
2311 case SOURCE_POST:
2312 case SOURCE_INOTIFY:
2313 s->enabled = m;
2314 break;
2315
2316 default:
2317 assert_not_reached("Wut? I shouldn't exist.");
2318 }
2319 }
2320
2321 if (s->pending)
2322 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2323
2324 if (s->prepare)
2325 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2326
2327 return 0;
2328 }
2329
2330 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2331 assert_return(s, -EINVAL);
2332 assert_return(usec, -EINVAL);
2333 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2334 assert_return(!event_pid_changed(s->event), -ECHILD);
2335
2336 *usec = s->time.next;
2337 return 0;
2338 }
2339
2340 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2341 struct clock_data *d;
2342 int r;
2343
2344 assert_return(s, -EINVAL);
2345 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2346 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2347 assert_return(!event_pid_changed(s->event), -ECHILD);
2348
2349 r = source_set_pending(s, false);
2350 if (r < 0)
2351 return r;
2352
2353 s->time.next = usec;
2354
2355 d = event_get_clock_data(s->event, s->type);
2356 assert(d);
2357
2358 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2359 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2360 d->needs_rearm = true;
2361
2362 return 0;
2363 }
2364
2365 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2366 assert_return(s, -EINVAL);
2367 assert_return(usec, -EINVAL);
2368 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2369 assert_return(!event_pid_changed(s->event), -ECHILD);
2370
2371 *usec = s->time.accuracy;
2372 return 0;
2373 }
2374
2375 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2376 struct clock_data *d;
2377 int r;
2378
2379 assert_return(s, -EINVAL);
2380 assert_return(usec != (uint64_t) -1, -EINVAL);
2381 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2382 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2383 assert_return(!event_pid_changed(s->event), -ECHILD);
2384
2385 r = source_set_pending(s, false);
2386 if (r < 0)
2387 return r;
2388
2389 if (usec == 0)
2390 usec = DEFAULT_ACCURACY_USEC;
2391
2392 s->time.accuracy = usec;
2393
2394 d = event_get_clock_data(s->event, s->type);
2395 assert(d);
2396
2397 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2398 d->needs_rearm = true;
2399
2400 return 0;
2401 }
2402
2403 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2404 assert_return(s, -EINVAL);
2405 assert_return(clock, -EINVAL);
2406 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2407 assert_return(!event_pid_changed(s->event), -ECHILD);
2408
2409 *clock = event_source_type_to_clock(s->type);
2410 return 0;
2411 }
2412
2413 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2414 assert_return(s, -EINVAL);
2415 assert_return(pid, -EINVAL);
2416 assert_return(s->type == SOURCE_CHILD, -EDOM);
2417 assert_return(!event_pid_changed(s->event), -ECHILD);
2418
2419 *pid = s->child.pid;
2420 return 0;
2421 }
2422
2423 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2424 assert_return(s, -EINVAL);
2425 assert_return(mask, -EINVAL);
2426 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2427 assert_return(!event_pid_changed(s->event), -ECHILD);
2428
2429 *mask = s->inotify.mask;
2430 return 0;
2431 }
2432
2433 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2434 int r;
2435
2436 assert_return(s, -EINVAL);
2437 assert_return(s->type != SOURCE_EXIT, -EDOM);
2438 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2439 assert_return(!event_pid_changed(s->event), -ECHILD);
2440
2441 if (s->prepare == callback)
2442 return 0;
2443
2444 if (callback && s->prepare) {
2445 s->prepare = callback;
2446 return 0;
2447 }
2448
2449 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2450 if (r < 0)
2451 return r;
2452
2453 s->prepare = callback;
2454
2455 if (callback) {
2456 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2457 if (r < 0)
2458 return r;
2459 } else
2460 prioq_remove(s->event->prepare, s, &s->prepare_index);
2461
2462 return 0;
2463 }
2464
2465 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2466 assert_return(s, NULL);
2467
2468 return s->userdata;
2469 }
2470
2471 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2472 void *ret;
2473
2474 assert_return(s, NULL);
2475
2476 ret = s->userdata;
2477 s->userdata = userdata;
2478
2479 return ret;
2480 }
2481
2482 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2483 usec_t c;
2484 assert(e);
2485 assert(a <= b);
2486
2487 if (a <= 0)
2488 return 0;
2489 if (a >= USEC_INFINITY)
2490 return USEC_INFINITY;
2491
2492 if (b <= a + 1)
2493 return a;
2494
2495 initialize_perturb(e);
2496
2497 /*
2498 Find a good time to wake up again between times a and b. We
2499 have two goals here:
2500
2501 a) We want to wake up as seldom as possible, hence prefer
2502 later times over earlier times.
2503
2504 b) But if we have to wake up, then let's make sure to
2505 dispatch as much as possible on the entire system.
2506
2507 We implement this by waking up everywhere at the same time
2508 within any given minute if we can, synchronised via the
2509 perturbation value determined from the boot ID. If we can't,
2510 then we try to find the same spot in every 10s, then 1s and
2511 then 250ms step. Otherwise, we pick the last possible time
2512 to wake up.
2513 */
2514
2515 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2516 if (c >= b) {
2517 if (_unlikely_(c < USEC_PER_MINUTE))
2518 return b;
2519
2520 c -= USEC_PER_MINUTE;
2521 }
2522
2523 if (c >= a)
2524 return c;
2525
2526 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2527 if (c >= b) {
2528 if (_unlikely_(c < USEC_PER_SEC*10))
2529 return b;
2530
2531 c -= USEC_PER_SEC*10;
2532 }
2533
2534 if (c >= a)
2535 return c;
2536
2537 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2538 if (c >= b) {
2539 if (_unlikely_(c < USEC_PER_SEC))
2540 return b;
2541
2542 c -= USEC_PER_SEC;
2543 }
2544
2545 if (c >= a)
2546 return c;
2547
2548 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2549 if (c >= b) {
2550 if (_unlikely_(c < USEC_PER_MSEC*250))
2551 return b;
2552
2553 c -= USEC_PER_MSEC*250;
2554 }
2555
2556 if (c >= a)
2557 return c;
2558
2559 return b;
2560 }
2561
2562 static int event_arm_timer(
2563 sd_event *e,
2564 struct clock_data *d) {
2565
2566 struct itimerspec its = {};
2567 sd_event_source *a, *b;
2568 usec_t t;
2569 int r;
2570
2571 assert(e);
2572 assert(d);
2573
2574 if (!d->needs_rearm)
2575 return 0;
2576 else
2577 d->needs_rearm = false;
2578
2579 a = prioq_peek(d->earliest);
2580 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2581
2582 if (d->fd < 0)
2583 return 0;
2584
2585 if (d->next == USEC_INFINITY)
2586 return 0;
2587
2588 /* disarm */
2589 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2590 if (r < 0)
2591 return r;
2592
2593 d->next = USEC_INFINITY;
2594 return 0;
2595 }
2596
2597 b = prioq_peek(d->latest);
2598 assert_se(b && b->enabled != SD_EVENT_OFF);
2599
2600 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2601 if (d->next == t)
2602 return 0;
2603
2604 assert_se(d->fd >= 0);
2605
2606 if (t == 0) {
2607 /* We don' want to disarm here, just mean some time looooong ago. */
2608 its.it_value.tv_sec = 0;
2609 its.it_value.tv_nsec = 1;
2610 } else
2611 timespec_store(&its.it_value, t);
2612
2613 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2614 if (r < 0)
2615 return -errno;
2616
2617 d->next = t;
2618 return 0;
2619 }
2620
2621 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2622 assert(e);
2623 assert(s);
2624 assert(s->type == SOURCE_IO);
2625
2626 /* If the event source was already pending, we just OR in the
2627 * new revents, otherwise we reset the value. The ORing is
2628 * necessary to handle EPOLLONESHOT events properly where
2629 * readability might happen independently of writability, and
2630 * we need to keep track of both */
2631
2632 if (s->pending)
2633 s->io.revents |= revents;
2634 else
2635 s->io.revents = revents;
2636
2637 return source_set_pending(s, true);
2638 }
2639
2640 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2641 uint64_t x;
2642 ssize_t ss;
2643
2644 assert(e);
2645 assert(fd >= 0);
2646
2647 assert_return(events == EPOLLIN, -EIO);
2648
2649 ss = read(fd, &x, sizeof(x));
2650 if (ss < 0) {
2651 if (IN_SET(errno, EAGAIN, EINTR))
2652 return 0;
2653
2654 return -errno;
2655 }
2656
2657 if (_unlikely_(ss != sizeof(x)))
2658 return -EIO;
2659
2660 if (next)
2661 *next = USEC_INFINITY;
2662
2663 return 0;
2664 }
2665
2666 static int process_timer(
2667 sd_event *e,
2668 usec_t n,
2669 struct clock_data *d) {
2670
2671 sd_event_source *s;
2672 int r;
2673
2674 assert(e);
2675 assert(d);
2676
2677 for (;;) {
2678 s = prioq_peek(d->earliest);
2679 if (!s ||
2680 s->time.next > n ||
2681 s->enabled == SD_EVENT_OFF ||
2682 s->pending)
2683 break;
2684
2685 r = source_set_pending(s, true);
2686 if (r < 0)
2687 return r;
2688
2689 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2690 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2691 d->needs_rearm = true;
2692 }
2693
2694 return 0;
2695 }
2696
2697 static int process_child(sd_event *e) {
2698 sd_event_source *s;
2699 Iterator i;
2700 int r;
2701
2702 assert(e);
2703
2704 e->need_process_child = false;
2705
2706 /*
2707 So, this is ugly. We iteratively invoke waitid() with P_PID
2708 + WNOHANG for each PID we wait for, instead of using
2709 P_ALL. This is because we only want to get child
2710 information of very specific child processes, and not all
2711 of them. We might not have processed the SIGCHLD even of a
2712 previous invocation and we don't want to maintain a
2713 unbounded *per-child* event queue, hence we really don't
2714 want anything flushed out of the kernel's queue that we
2715 don't care about. Since this is O(n) this means that if you
2716 have a lot of processes you probably want to handle SIGCHLD
2717 yourself.
2718
2719 We do not reap the children here (by using WNOWAIT), this
2720 is only done after the event source is dispatched so that
2721 the callback still sees the process as a zombie.
2722 */
2723
2724 HASHMAP_FOREACH(s, e->child_sources, i) {
2725 assert(s->type == SOURCE_CHILD);
2726
2727 if (s->pending)
2728 continue;
2729
2730 if (s->enabled == SD_EVENT_OFF)
2731 continue;
2732
2733 zero(s->child.siginfo);
2734 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2735 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2736 if (r < 0)
2737 return -errno;
2738
2739 if (s->child.siginfo.si_pid != 0) {
2740 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2741
2742 if (!zombie && (s->child.options & WEXITED)) {
2743 /* If the child isn't dead then let's
2744 * immediately remove the state change
2745 * from the queue, since there's no
2746 * benefit in leaving it queued */
2747
2748 assert(s->child.options & (WSTOPPED|WCONTINUED));
2749 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2750 }
2751
2752 r = source_set_pending(s, true);
2753 if (r < 0)
2754 return r;
2755 }
2756 }
2757
2758 return 0;
2759 }
2760
2761 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2762 bool read_one = false;
2763 int r;
2764
2765 assert(e);
2766 assert(d);
2767 assert_return(events == EPOLLIN, -EIO);
2768
2769 /* If there's a signal queued on this priority and SIGCHLD is
2770 on this priority too, then make sure to recheck the
2771 children we watch. This is because we only ever dequeue
2772 the first signal per priority, and if we dequeue one, and
2773 SIGCHLD might be enqueued later we wouldn't know, but we
2774 might have higher priority children we care about hence we
2775 need to check that explicitly. */
2776
2777 if (sigismember(&d->sigset, SIGCHLD))
2778 e->need_process_child = true;
2779
2780 /* If there's already an event source pending for this
2781 * priority we don't read another */
2782 if (d->current)
2783 return 0;
2784
2785 for (;;) {
2786 struct signalfd_siginfo si;
2787 ssize_t n;
2788 sd_event_source *s = NULL;
2789
2790 n = read(d->fd, &si, sizeof(si));
2791 if (n < 0) {
2792 if (IN_SET(errno, EAGAIN, EINTR))
2793 return read_one;
2794
2795 return -errno;
2796 }
2797
2798 if (_unlikely_(n != sizeof(si)))
2799 return -EIO;
2800
2801 assert(SIGNAL_VALID(si.ssi_signo));
2802
2803 read_one = true;
2804
2805 if (e->signal_sources)
2806 s = e->signal_sources[si.ssi_signo];
2807 if (!s)
2808 continue;
2809 if (s->pending)
2810 continue;
2811
2812 s->signal.siginfo = si;
2813 d->current = s;
2814
2815 r = source_set_pending(s, true);
2816 if (r < 0)
2817 return r;
2818
2819 return 1;
2820 }
2821 }
2822
2823 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2824 ssize_t n;
2825
2826 assert(e);
2827 assert(d);
2828
2829 assert_return(revents == EPOLLIN, -EIO);
2830
2831 /* If there's already an event source pending for this priority, don't read another */
2832 if (d->n_pending > 0)
2833 return 0;
2834
2835 /* Is the read buffer non-empty? If so, let's not read more */
2836 if (d->buffer_filled > 0)
2837 return 0;
2838
2839 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2840 if (n < 0) {
2841 if (IN_SET(errno, EAGAIN, EINTR))
2842 return 0;
2843
2844 return -errno;
2845 }
2846
2847 assert(n > 0);
2848 d->buffer_filled = (size_t) n;
2849 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2850
2851 return 1;
2852 }
2853
2854 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2855 assert(e);
2856 assert(d);
2857 assert(sz <= d->buffer_filled);
2858
2859 if (sz == 0)
2860 return;
2861
2862 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2863 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2864 d->buffer_filled -= sz;
2865
2866 if (d->buffer_filled == 0)
2867 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2868 }
2869
2870 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2871 int r;
2872
2873 assert(e);
2874 assert(d);
2875
2876 /* If there's already an event source pending for this priority, don't read another */
2877 if (d->n_pending > 0)
2878 return 0;
2879
2880 while (d->buffer_filled > 0) {
2881 size_t sz;
2882
2883 /* Let's validate that the event structures are complete */
2884 if (d->buffer_filled < offsetof(struct inotify_event, name))
2885 return -EIO;
2886
2887 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2888 if (d->buffer_filled < sz)
2889 return -EIO;
2890
2891 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2892 struct inode_data *inode_data;
2893 Iterator i;
2894
2895 /* The queue overran, let's pass this event to all event sources connected to this inotify
2896 * object */
2897
2898 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2899 sd_event_source *s;
2900
2901 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2902
2903 if (s->enabled == SD_EVENT_OFF)
2904 continue;
2905
2906 r = source_set_pending(s, true);
2907 if (r < 0)
2908 return r;
2909 }
2910 }
2911 } else {
2912 struct inode_data *inode_data;
2913 sd_event_source *s;
2914
2915 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2916 * our watch descriptor table. */
2917 if (d->buffer.ev.mask & IN_IGNORED) {
2918
2919 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2920 if (!inode_data) {
2921 event_inotify_data_drop(e, d, sz);
2922 continue;
2923 }
2924
2925 /* The watch descriptor was removed by the kernel, let's drop it here too */
2926 inode_data->wd = -1;
2927 } else {
2928 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2929 if (!inode_data) {
2930 event_inotify_data_drop(e, d, sz);
2931 continue;
2932 }
2933 }
2934
2935 /* Trigger all event sources that are interested in these events. Also trigger all event
2936 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2937 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2938
2939 if (s->enabled == SD_EVENT_OFF)
2940 continue;
2941
2942 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2943 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2944 continue;
2945
2946 r = source_set_pending(s, true);
2947 if (r < 0)
2948 return r;
2949 }
2950 }
2951
2952 /* Something pending now? If so, let's finish, otherwise let's read more. */
2953 if (d->n_pending > 0)
2954 return 1;
2955 }
2956
2957 return 0;
2958 }
2959
2960 static int process_inotify(sd_event *e) {
2961 struct inotify_data *d;
2962 int r, done = 0;
2963
2964 assert(e);
2965
2966 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2967 r = event_inotify_data_process(e, d);
2968 if (r < 0)
2969 return r;
2970 if (r > 0)
2971 done ++;
2972 }
2973
2974 return done;
2975 }
2976
2977 static int source_dispatch(sd_event_source *s) {
2978 EventSourceType saved_type;
2979 int r = 0;
2980
2981 assert(s);
2982 assert(s->pending || s->type == SOURCE_EXIT);
2983
2984 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2985 * the event. */
2986 saved_type = s->type;
2987
2988 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2989 r = source_set_pending(s, false);
2990 if (r < 0)
2991 return r;
2992 }
2993
2994 if (s->type != SOURCE_POST) {
2995 sd_event_source *z;
2996 Iterator i;
2997
2998 /* If we execute a non-post source, let's mark all
2999 * post sources as pending */
3000
3001 SET_FOREACH(z, s->event->post_sources, i) {
3002 if (z->enabled == SD_EVENT_OFF)
3003 continue;
3004
3005 r = source_set_pending(z, true);
3006 if (r < 0)
3007 return r;
3008 }
3009 }
3010
3011 if (s->enabled == SD_EVENT_ONESHOT) {
3012 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3013 if (r < 0)
3014 return r;
3015 }
3016
3017 s->dispatching = true;
3018
3019 switch (s->type) {
3020
3021 case SOURCE_IO:
3022 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3023 break;
3024
3025 case SOURCE_TIME_REALTIME:
3026 case SOURCE_TIME_BOOTTIME:
3027 case SOURCE_TIME_MONOTONIC:
3028 case SOURCE_TIME_REALTIME_ALARM:
3029 case SOURCE_TIME_BOOTTIME_ALARM:
3030 r = s->time.callback(s, s->time.next, s->userdata);
3031 break;
3032
3033 case SOURCE_SIGNAL:
3034 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3035 break;
3036
3037 case SOURCE_CHILD: {
3038 bool zombie;
3039
3040 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3041
3042 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3043
3044 /* Now, reap the PID for good. */
3045 if (zombie)
3046 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3047
3048 break;
3049 }
3050
3051 case SOURCE_DEFER:
3052 r = s->defer.callback(s, s->userdata);
3053 break;
3054
3055 case SOURCE_POST:
3056 r = s->post.callback(s, s->userdata);
3057 break;
3058
3059 case SOURCE_EXIT:
3060 r = s->exit.callback(s, s->userdata);
3061 break;
3062
3063 case SOURCE_INOTIFY: {
3064 struct sd_event *e = s->event;
3065 struct inotify_data *d;
3066 size_t sz;
3067
3068 assert(s->inotify.inode_data);
3069 assert_se(d = s->inotify.inode_data->inotify_data);
3070
3071 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3072 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3073 assert(d->buffer_filled >= sz);
3074
3075 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3076
3077 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3078 * buffer. */
3079 if (d->n_pending == 0)
3080 event_inotify_data_drop(e, d, sz);
3081
3082 break;
3083 }
3084
3085 case SOURCE_WATCHDOG:
3086 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3087 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3088 assert_not_reached("Wut? I shouldn't exist.");
3089 }
3090
3091 s->dispatching = false;
3092
3093 if (r < 0)
3094 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3095 strna(s->description), event_source_type_to_string(saved_type));
3096
3097 if (s->n_ref == 0)
3098 source_free(s);
3099 else if (r < 0)
3100 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3101
3102 return 1;
3103 }
3104
3105 static int event_prepare(sd_event *e) {
3106 int r;
3107
3108 assert(e);
3109
3110 for (;;) {
3111 sd_event_source *s;
3112
3113 s = prioq_peek(e->prepare);
3114 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3115 break;
3116
3117 s->prepare_iteration = e->iteration;
3118 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3119 if (r < 0)
3120 return r;
3121
3122 assert(s->prepare);
3123
3124 s->dispatching = true;
3125 r = s->prepare(s, s->userdata);
3126 s->dispatching = false;
3127
3128 if (r < 0)
3129 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3130 strna(s->description), event_source_type_to_string(s->type));
3131
3132 if (s->n_ref == 0)
3133 source_free(s);
3134 else if (r < 0)
3135 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3136 }
3137
3138 return 0;
3139 }
3140
3141 static int dispatch_exit(sd_event *e) {
3142 sd_event_source *p;
3143 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3144 int r;
3145
3146 assert(e);
3147
3148 p = prioq_peek(e->exit);
3149 if (!p || p->enabled == SD_EVENT_OFF) {
3150 e->state = SD_EVENT_FINISHED;
3151 return 0;
3152 }
3153
3154 ref = sd_event_ref(e);
3155 e->iteration++;
3156 e->state = SD_EVENT_EXITING;
3157 r = source_dispatch(p);
3158 e->state = SD_EVENT_INITIAL;
3159 return r;
3160 }
3161
3162 static sd_event_source* event_next_pending(sd_event *e) {
3163 sd_event_source *p;
3164
3165 assert(e);
3166
3167 p = prioq_peek(e->pending);
3168 if (!p)
3169 return NULL;
3170
3171 if (p->enabled == SD_EVENT_OFF)
3172 return NULL;
3173
3174 return p;
3175 }
3176
3177 static int arm_watchdog(sd_event *e) {
3178 struct itimerspec its = {};
3179 usec_t t;
3180 int r;
3181
3182 assert(e);
3183 assert(e->watchdog_fd >= 0);
3184
3185 t = sleep_between(e,
3186 e->watchdog_last + (e->watchdog_period / 2),
3187 e->watchdog_last + (e->watchdog_period * 3 / 4));
3188
3189 timespec_store(&its.it_value, t);
3190
3191 /* Make sure we never set the watchdog to 0, which tells the
3192 * kernel to disable it. */
3193 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3194 its.it_value.tv_nsec = 1;
3195
3196 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3197 if (r < 0)
3198 return -errno;
3199
3200 return 0;
3201 }
3202
3203 static int process_watchdog(sd_event *e) {
3204 assert(e);
3205
3206 if (!e->watchdog)
3207 return 0;
3208
3209 /* Don't notify watchdog too often */
3210 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3211 return 0;
3212
3213 sd_notify(false, "WATCHDOG=1");
3214 e->watchdog_last = e->timestamp.monotonic;
3215
3216 return arm_watchdog(e);
3217 }
3218
3219 static void event_close_inode_data_fds(sd_event *e) {
3220 struct inode_data *d;
3221
3222 assert(e);
3223
3224 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3225 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3226 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3227 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3228 * compromise. */
3229
3230 while ((d = e->inode_data_to_close)) {
3231 assert(d->fd >= 0);
3232 d->fd = safe_close(d->fd);
3233
3234 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3235 }
3236 }
3237
3238 _public_ int sd_event_prepare(sd_event *e) {
3239 int r;
3240
3241 assert_return(e, -EINVAL);
3242 assert_return(e = event_resolve(e), -ENOPKG);
3243 assert_return(!event_pid_changed(e), -ECHILD);
3244 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3245 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3246
3247 if (e->exit_requested)
3248 goto pending;
3249
3250 e->iteration++;
3251
3252 e->state = SD_EVENT_PREPARING;
3253 r = event_prepare(e);
3254 e->state = SD_EVENT_INITIAL;
3255 if (r < 0)
3256 return r;
3257
3258 r = event_arm_timer(e, &e->realtime);
3259 if (r < 0)
3260 return r;
3261
3262 r = event_arm_timer(e, &e->boottime);
3263 if (r < 0)
3264 return r;
3265
3266 r = event_arm_timer(e, &e->monotonic);
3267 if (r < 0)
3268 return r;
3269
3270 r = event_arm_timer(e, &e->realtime_alarm);
3271 if (r < 0)
3272 return r;
3273
3274 r = event_arm_timer(e, &e->boottime_alarm);
3275 if (r < 0)
3276 return r;
3277
3278 event_close_inode_data_fds(e);
3279
3280 if (event_next_pending(e) || e->need_process_child)
3281 goto pending;
3282
3283 e->state = SD_EVENT_ARMED;
3284
3285 return 0;
3286
3287 pending:
3288 e->state = SD_EVENT_ARMED;
3289 r = sd_event_wait(e, 0);
3290 if (r == 0)
3291 e->state = SD_EVENT_ARMED;
3292
3293 return r;
3294 }
3295
3296 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3297 struct epoll_event *ev_queue;
3298 unsigned ev_queue_max;
3299 int r, m, i;
3300
3301 assert_return(e, -EINVAL);
3302 assert_return(e = event_resolve(e), -ENOPKG);
3303 assert_return(!event_pid_changed(e), -ECHILD);
3304 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3305 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3306
3307 if (e->exit_requested) {
3308 e->state = SD_EVENT_PENDING;
3309 return 1;
3310 }
3311
3312 ev_queue_max = MAX(e->n_sources, 1u);
3313 ev_queue = newa(struct epoll_event, ev_queue_max);
3314
3315 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3316 if (e->inotify_data_buffered)
3317 timeout = 0;
3318
3319 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3320 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3321 if (m < 0) {
3322 if (errno == EINTR) {
3323 e->state = SD_EVENT_PENDING;
3324 return 1;
3325 }
3326
3327 r = -errno;
3328 goto finish;
3329 }
3330
3331 triple_timestamp_get(&e->timestamp);
3332
3333 for (i = 0; i < m; i++) {
3334
3335 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3336 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3337 else {
3338 WakeupType *t = ev_queue[i].data.ptr;
3339
3340 switch (*t) {
3341
3342 case WAKEUP_EVENT_SOURCE:
3343 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3344 break;
3345
3346 case WAKEUP_CLOCK_DATA: {
3347 struct clock_data *d = ev_queue[i].data.ptr;
3348 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3349 break;
3350 }
3351
3352 case WAKEUP_SIGNAL_DATA:
3353 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3354 break;
3355
3356 case WAKEUP_INOTIFY_DATA:
3357 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3358 break;
3359
3360 default:
3361 assert_not_reached("Invalid wake-up pointer");
3362 }
3363 }
3364 if (r < 0)
3365 goto finish;
3366 }
3367
3368 r = process_watchdog(e);
3369 if (r < 0)
3370 goto finish;
3371
3372 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3373 if (r < 0)
3374 goto finish;
3375
3376 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3377 if (r < 0)
3378 goto finish;
3379
3380 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3381 if (r < 0)
3382 goto finish;
3383
3384 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3385 if (r < 0)
3386 goto finish;
3387
3388 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3389 if (r < 0)
3390 goto finish;
3391
3392 if (e->need_process_child) {
3393 r = process_child(e);
3394 if (r < 0)
3395 goto finish;
3396 }
3397
3398 r = process_inotify(e);
3399 if (r < 0)
3400 goto finish;
3401
3402 if (event_next_pending(e)) {
3403 e->state = SD_EVENT_PENDING;
3404
3405 return 1;
3406 }
3407
3408 r = 0;
3409
3410 finish:
3411 e->state = SD_EVENT_INITIAL;
3412
3413 return r;
3414 }
3415
3416 _public_ int sd_event_dispatch(sd_event *e) {
3417 sd_event_source *p;
3418 int r;
3419
3420 assert_return(e, -EINVAL);
3421 assert_return(e = event_resolve(e), -ENOPKG);
3422 assert_return(!event_pid_changed(e), -ECHILD);
3423 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3424 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3425
3426 if (e->exit_requested)
3427 return dispatch_exit(e);
3428
3429 p = event_next_pending(e);
3430 if (p) {
3431 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3432
3433 ref = sd_event_ref(e);
3434 e->state = SD_EVENT_RUNNING;
3435 r = source_dispatch(p);
3436 e->state = SD_EVENT_INITIAL;
3437 return r;
3438 }
3439
3440 e->state = SD_EVENT_INITIAL;
3441
3442 return 1;
3443 }
3444
3445 static void event_log_delays(sd_event *e) {
3446 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3447 unsigned i;
3448 int o;
3449
3450 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3451 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3452 e->delays[i] = 0;
3453 }
3454 log_debug("Event loop iterations: %.*s", o, b);
3455 }
3456
3457 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3458 int r;
3459
3460 assert_return(e, -EINVAL);
3461 assert_return(e = event_resolve(e), -ENOPKG);
3462 assert_return(!event_pid_changed(e), -ECHILD);
3463 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3464 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3465
3466 if (e->profile_delays && e->last_run) {
3467 usec_t this_run;
3468 unsigned l;
3469
3470 this_run = now(CLOCK_MONOTONIC);
3471
3472 l = u64log2(this_run - e->last_run);
3473 assert(l < sizeof(e->delays));
3474 e->delays[l]++;
3475
3476 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3477 event_log_delays(e);
3478 e->last_log = this_run;
3479 }
3480 }
3481
3482 r = sd_event_prepare(e);
3483 if (r == 0)
3484 /* There was nothing? Then wait... */
3485 r = sd_event_wait(e, timeout);
3486
3487 if (e->profile_delays)
3488 e->last_run = now(CLOCK_MONOTONIC);
3489
3490 if (r > 0) {
3491 /* There's something now, then let's dispatch it */
3492 r = sd_event_dispatch(e);
3493 if (r < 0)
3494 return r;
3495
3496 return 1;
3497 }
3498
3499 return r;
3500 }
3501
3502 _public_ int sd_event_loop(sd_event *e) {
3503 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3504 int r;
3505
3506 assert_return(e, -EINVAL);
3507 assert_return(e = event_resolve(e), -ENOPKG);
3508 assert_return(!event_pid_changed(e), -ECHILD);
3509 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3510
3511 ref = sd_event_ref(e);
3512
3513 while (e->state != SD_EVENT_FINISHED) {
3514 r = sd_event_run(e, (uint64_t) -1);
3515 if (r < 0)
3516 return r;
3517 }
3518
3519 return e->exit_code;
3520 }
3521
3522 _public_ int sd_event_get_fd(sd_event *e) {
3523
3524 assert_return(e, -EINVAL);
3525 assert_return(e = event_resolve(e), -ENOPKG);
3526 assert_return(!event_pid_changed(e), -ECHILD);
3527
3528 return e->epoll_fd;
3529 }
3530
3531 _public_ int sd_event_get_state(sd_event *e) {
3532 assert_return(e, -EINVAL);
3533 assert_return(e = event_resolve(e), -ENOPKG);
3534 assert_return(!event_pid_changed(e), -ECHILD);
3535
3536 return e->state;
3537 }
3538
3539 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3540 assert_return(e, -EINVAL);
3541 assert_return(e = event_resolve(e), -ENOPKG);
3542 assert_return(code, -EINVAL);
3543 assert_return(!event_pid_changed(e), -ECHILD);
3544
3545 if (!e->exit_requested)
3546 return -ENODATA;
3547
3548 *code = e->exit_code;
3549 return 0;
3550 }
3551
3552 _public_ int sd_event_exit(sd_event *e, int code) {
3553 assert_return(e, -EINVAL);
3554 assert_return(e = event_resolve(e), -ENOPKG);
3555 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3556 assert_return(!event_pid_changed(e), -ECHILD);
3557
3558 e->exit_requested = true;
3559 e->exit_code = code;
3560
3561 return 0;
3562 }
3563
3564 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3565 assert_return(e, -EINVAL);
3566 assert_return(e = event_resolve(e), -ENOPKG);
3567 assert_return(usec, -EINVAL);
3568 assert_return(!event_pid_changed(e), -ECHILD);
3569
3570 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3571 return -EOPNOTSUPP;
3572
3573 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3574 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3575 * the purpose of getting the time this doesn't matter. */
3576 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3577 return -EOPNOTSUPP;
3578
3579 if (!triple_timestamp_is_set(&e->timestamp)) {
3580 /* Implicitly fall back to now() if we never ran
3581 * before and thus have no cached time. */
3582 *usec = now(clock);
3583 return 1;
3584 }
3585
3586 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3587 return 0;
3588 }
3589
3590 _public_ int sd_event_default(sd_event **ret) {
3591 sd_event *e = NULL;
3592 int r;
3593
3594 if (!ret)
3595 return !!default_event;
3596
3597 if (default_event) {
3598 *ret = sd_event_ref(default_event);
3599 return 0;
3600 }
3601
3602 r = sd_event_new(&e);
3603 if (r < 0)
3604 return r;
3605
3606 e->default_event_ptr = &default_event;
3607 e->tid = gettid();
3608 default_event = e;
3609
3610 *ret = e;
3611 return 1;
3612 }
3613
3614 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3615 assert_return(e, -EINVAL);
3616 assert_return(e = event_resolve(e), -ENOPKG);
3617 assert_return(tid, -EINVAL);
3618 assert_return(!event_pid_changed(e), -ECHILD);
3619
3620 if (e->tid != 0) {
3621 *tid = e->tid;
3622 return 0;
3623 }
3624
3625 return -ENXIO;
3626 }
3627
3628 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3629 int r;
3630
3631 assert_return(e, -EINVAL);
3632 assert_return(e = event_resolve(e), -ENOPKG);
3633 assert_return(!event_pid_changed(e), -ECHILD);
3634
3635 if (e->watchdog == !!b)
3636 return e->watchdog;
3637
3638 if (b) {
3639 struct epoll_event ev;
3640
3641 r = sd_watchdog_enabled(false, &e->watchdog_period);
3642 if (r <= 0)
3643 return r;
3644
3645 /* Issue first ping immediately */
3646 sd_notify(false, "WATCHDOG=1");
3647 e->watchdog_last = now(CLOCK_MONOTONIC);
3648
3649 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3650 if (e->watchdog_fd < 0)
3651 return -errno;
3652
3653 r = arm_watchdog(e);
3654 if (r < 0)
3655 goto fail;
3656
3657 ev = (struct epoll_event) {
3658 .events = EPOLLIN,
3659 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3660 };
3661
3662 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3663 if (r < 0) {
3664 r = -errno;
3665 goto fail;
3666 }
3667
3668 } else {
3669 if (e->watchdog_fd >= 0) {
3670 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3671 e->watchdog_fd = safe_close(e->watchdog_fd);
3672 }
3673 }
3674
3675 e->watchdog = !!b;
3676 return e->watchdog;
3677
3678 fail:
3679 e->watchdog_fd = safe_close(e->watchdog_fd);
3680 return r;
3681 }
3682
3683 _public_ int sd_event_get_watchdog(sd_event *e) {
3684 assert_return(e, -EINVAL);
3685 assert_return(e = event_resolve(e), -ENOPKG);
3686 assert_return(!event_pid_changed(e), -ECHILD);
3687
3688 return e->watchdog;
3689 }
3690
3691 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3692 assert_return(e, -EINVAL);
3693 assert_return(e = event_resolve(e), -ENOPKG);
3694 assert_return(!event_pid_changed(e), -ECHILD);
3695
3696 *ret = e->iteration;
3697 return 0;
3698 }
3699
3700 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3701 assert_return(s, -EINVAL);
3702
3703 s->destroy_callback = callback;
3704 return 0;
3705 }
3706
3707 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3708 assert_return(s, -EINVAL);
3709
3710 if (ret)
3711 *ret = s->destroy_callback;
3712
3713 return !!s->destroy_callback;
3714 }