]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
tree-wide: use 'signed int' instead of 'int' for bit field variables
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 ***/
4
5 #include <sys/epoll.h>
6 #include <sys/timerfd.h>
7 #include <sys/wait.h>
8
9 #include "sd-daemon.h"
10 #include "sd-event.h"
11 #include "sd-id128.h"
12
13 #include "alloc-util.h"
14 #include "fd-util.h"
15 #include "fs-util.h"
16 #include "hashmap.h"
17 #include "list.h"
18 #include "macro.h"
19 #include "missing.h"
20 #include "prioq.h"
21 #include "process-util.h"
22 #include "set.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
26 #include "time-util.h"
27 #include "util.h"
28
29 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
30
31 typedef enum EventSourceType {
32 SOURCE_IO,
33 SOURCE_TIME_REALTIME,
34 SOURCE_TIME_BOOTTIME,
35 SOURCE_TIME_MONOTONIC,
36 SOURCE_TIME_REALTIME_ALARM,
37 SOURCE_TIME_BOOTTIME_ALARM,
38 SOURCE_SIGNAL,
39 SOURCE_CHILD,
40 SOURCE_DEFER,
41 SOURCE_POST,
42 SOURCE_EXIT,
43 SOURCE_WATCHDOG,
44 SOURCE_INOTIFY,
45 _SOURCE_EVENT_SOURCE_TYPE_MAX,
46 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
47 } EventSourceType;
48
49 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
50 [SOURCE_IO] = "io",
51 [SOURCE_TIME_REALTIME] = "realtime",
52 [SOURCE_TIME_BOOTTIME] = "bootime",
53 [SOURCE_TIME_MONOTONIC] = "monotonic",
54 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
55 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
56 [SOURCE_SIGNAL] = "signal",
57 [SOURCE_CHILD] = "child",
58 [SOURCE_DEFER] = "defer",
59 [SOURCE_POST] = "post",
60 [SOURCE_EXIT] = "exit",
61 [SOURCE_WATCHDOG] = "watchdog",
62 [SOURCE_INOTIFY] = "inotify",
63 };
64
65 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
66
67 /* All objects we use in epoll events start with this value, so that
68 * we know how to dispatch it */
69 typedef enum WakeupType {
70 WAKEUP_NONE,
71 WAKEUP_EVENT_SOURCE,
72 WAKEUP_CLOCK_DATA,
73 WAKEUP_SIGNAL_DATA,
74 WAKEUP_INOTIFY_DATA,
75 _WAKEUP_TYPE_MAX,
76 _WAKEUP_TYPE_INVALID = -1,
77 } WakeupType;
78
79 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
80
81 struct inode_data;
82
83 struct sd_event_source {
84 WakeupType wakeup;
85
86 unsigned n_ref;
87
88 sd_event *event;
89 void *userdata;
90 sd_event_handler_t prepare;
91
92 char *description;
93
94 EventSourceType type:5;
95 signed int enabled:3;
96 bool pending:1;
97 bool dispatching:1;
98 bool floating:1;
99
100 int64_t priority;
101 unsigned pending_index;
102 unsigned prepare_index;
103 uint64_t pending_iteration;
104 uint64_t prepare_iteration;
105
106 sd_event_destroy_t destroy_callback;
107
108 LIST_FIELDS(sd_event_source, sources);
109
110 union {
111 struct {
112 sd_event_io_handler_t callback;
113 int fd;
114 uint32_t events;
115 uint32_t revents;
116 bool registered:1;
117 bool owned:1;
118 } io;
119 struct {
120 sd_event_time_handler_t callback;
121 usec_t next, accuracy;
122 unsigned earliest_index;
123 unsigned latest_index;
124 } time;
125 struct {
126 sd_event_signal_handler_t callback;
127 struct signalfd_siginfo siginfo;
128 int sig;
129 } signal;
130 struct {
131 sd_event_child_handler_t callback;
132 siginfo_t siginfo;
133 pid_t pid;
134 int options;
135 } child;
136 struct {
137 sd_event_handler_t callback;
138 } defer;
139 struct {
140 sd_event_handler_t callback;
141 } post;
142 struct {
143 sd_event_handler_t callback;
144 unsigned prioq_index;
145 } exit;
146 struct {
147 sd_event_inotify_handler_t callback;
148 uint32_t mask;
149 struct inode_data *inode_data;
150 LIST_FIELDS(sd_event_source, by_inode_data);
151 } inotify;
152 };
153 };
154
155 struct clock_data {
156 WakeupType wakeup;
157 int fd;
158
159 /* For all clocks we maintain two priority queues each, one
160 * ordered for the earliest times the events may be
161 * dispatched, and one ordered by the latest times they must
162 * have been dispatched. The range between the top entries in
163 * the two prioqs is the time window we can freely schedule
164 * wakeups in */
165
166 Prioq *earliest;
167 Prioq *latest;
168 usec_t next;
169
170 bool needs_rearm:1;
171 };
172
173 struct signal_data {
174 WakeupType wakeup;
175
176 /* For each priority we maintain one signal fd, so that we
177 * only have to dequeue a single event per priority at a
178 * time. */
179
180 int fd;
181 int64_t priority;
182 sigset_t sigset;
183 sd_event_source *current;
184 };
185
186 /* A structure listing all event sources currently watching a specific inode */
187 struct inode_data {
188 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
189 ino_t ino;
190 dev_t dev;
191
192 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
193 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
194 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
195 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
196 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
197 * iteration. */
198 int fd;
199
200 /* The inotify "watch descriptor" */
201 int wd;
202
203 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
204 * most recently been set on the watch descriptor. */
205 uint32_t combined_mask;
206
207 /* All event sources subscribed to this inode */
208 LIST_HEAD(sd_event_source, event_sources);
209
210 /* The inotify object we watch this inode with */
211 struct inotify_data *inotify_data;
212
213 /* A linked list of all inode data objects with fds to close (see above) */
214 LIST_FIELDS(struct inode_data, to_close);
215 };
216
217 /* A structure encapsulating an inotify fd */
218 struct inotify_data {
219 WakeupType wakeup;
220
221 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
222 * a time */
223
224 int fd;
225 int64_t priority;
226
227 Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
228 Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
229
230 /* The buffer we read inotify events into */
231 union inotify_event_buffer buffer;
232 size_t buffer_filled; /* fill level of the buffer */
233
234 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
235 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
236 * the events locally if they can't be coalesced). */
237 unsigned n_pending;
238
239 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
240 * to make it efficient to figure out what inotify objects to process data on next. */
241 LIST_FIELDS(struct inotify_data, buffered);
242 };
243
244 struct sd_event {
245 unsigned n_ref;
246
247 int epoll_fd;
248 int watchdog_fd;
249
250 Prioq *pending;
251 Prioq *prepare;
252
253 /* timerfd_create() only supports these five clocks so far. We
254 * can add support for more clocks when the kernel learns to
255 * deal with them, too. */
256 struct clock_data realtime;
257 struct clock_data boottime;
258 struct clock_data monotonic;
259 struct clock_data realtime_alarm;
260 struct clock_data boottime_alarm;
261
262 usec_t perturb;
263
264 sd_event_source **signal_sources; /* indexed by signal number */
265 Hashmap *signal_data; /* indexed by priority */
266
267 Hashmap *child_sources;
268 unsigned n_enabled_child_sources;
269
270 Set *post_sources;
271
272 Prioq *exit;
273
274 Hashmap *inotify_data; /* indexed by priority */
275
276 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
277 LIST_HEAD(struct inode_data, inode_data_to_close);
278
279 /* A list of inotify objects that already have events buffered which aren't processed yet */
280 LIST_HEAD(struct inotify_data, inotify_data_buffered);
281
282 pid_t original_pid;
283
284 uint64_t iteration;
285 triple_timestamp timestamp;
286 int state;
287
288 bool exit_requested:1;
289 bool need_process_child:1;
290 bool watchdog:1;
291 bool profile_delays:1;
292
293 int exit_code;
294
295 pid_t tid;
296 sd_event **default_event_ptr;
297
298 usec_t watchdog_last, watchdog_period;
299
300 unsigned n_sources;
301
302 LIST_HEAD(sd_event_source, sources);
303
304 usec_t last_run, last_log;
305 unsigned delays[sizeof(usec_t) * 8];
306 };
307
308 static thread_local sd_event *default_event = NULL;
309
310 static void source_disconnect(sd_event_source *s);
311 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
312
313 static sd_event *event_resolve(sd_event *e) {
314 return e == SD_EVENT_DEFAULT ? default_event : e;
315 }
316
317 static int pending_prioq_compare(const void *a, const void *b) {
318 const sd_event_source *x = a, *y = b;
319
320 assert(x->pending);
321 assert(y->pending);
322
323 /* Enabled ones first */
324 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
325 return -1;
326 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
327 return 1;
328
329 /* Lower priority values first */
330 if (x->priority < y->priority)
331 return -1;
332 if (x->priority > y->priority)
333 return 1;
334
335 /* Older entries first */
336 if (x->pending_iteration < y->pending_iteration)
337 return -1;
338 if (x->pending_iteration > y->pending_iteration)
339 return 1;
340
341 return 0;
342 }
343
344 static int prepare_prioq_compare(const void *a, const void *b) {
345 const sd_event_source *x = a, *y = b;
346
347 assert(x->prepare);
348 assert(y->prepare);
349
350 /* Enabled ones first */
351 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
352 return -1;
353 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
354 return 1;
355
356 /* Move most recently prepared ones last, so that we can stop
357 * preparing as soon as we hit one that has already been
358 * prepared in the current iteration */
359 if (x->prepare_iteration < y->prepare_iteration)
360 return -1;
361 if (x->prepare_iteration > y->prepare_iteration)
362 return 1;
363
364 /* Lower priority values first */
365 if (x->priority < y->priority)
366 return -1;
367 if (x->priority > y->priority)
368 return 1;
369
370 return 0;
371 }
372
373 static int earliest_time_prioq_compare(const void *a, const void *b) {
374 const sd_event_source *x = a, *y = b;
375
376 assert(EVENT_SOURCE_IS_TIME(x->type));
377 assert(x->type == y->type);
378
379 /* Enabled ones first */
380 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
381 return -1;
382 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
383 return 1;
384
385 /* Move the pending ones to the end */
386 if (!x->pending && y->pending)
387 return -1;
388 if (x->pending && !y->pending)
389 return 1;
390
391 /* Order by time */
392 if (x->time.next < y->time.next)
393 return -1;
394 if (x->time.next > y->time.next)
395 return 1;
396
397 return 0;
398 }
399
400 static usec_t time_event_source_latest(const sd_event_source *s) {
401 return usec_add(s->time.next, s->time.accuracy);
402 }
403
404 static int latest_time_prioq_compare(const void *a, const void *b) {
405 const sd_event_source *x = a, *y = b;
406
407 assert(EVENT_SOURCE_IS_TIME(x->type));
408 assert(x->type == y->type);
409
410 /* Enabled ones first */
411 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
412 return -1;
413 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
414 return 1;
415
416 /* Move the pending ones to the end */
417 if (!x->pending && y->pending)
418 return -1;
419 if (x->pending && !y->pending)
420 return 1;
421
422 /* Order by time */
423 if (time_event_source_latest(x) < time_event_source_latest(y))
424 return -1;
425 if (time_event_source_latest(x) > time_event_source_latest(y))
426 return 1;
427
428 return 0;
429 }
430
431 static int exit_prioq_compare(const void *a, const void *b) {
432 const sd_event_source *x = a, *y = b;
433
434 assert(x->type == SOURCE_EXIT);
435 assert(y->type == SOURCE_EXIT);
436
437 /* Enabled ones first */
438 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
439 return -1;
440 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
441 return 1;
442
443 /* Lower priority values first */
444 if (x->priority < y->priority)
445 return -1;
446 if (x->priority > y->priority)
447 return 1;
448
449 return 0;
450 }
451
452 static void free_clock_data(struct clock_data *d) {
453 assert(d);
454 assert(d->wakeup == WAKEUP_CLOCK_DATA);
455
456 safe_close(d->fd);
457 prioq_free(d->earliest);
458 prioq_free(d->latest);
459 }
460
461 static void event_free(sd_event *e) {
462 sd_event_source *s;
463
464 assert(e);
465
466 while ((s = e->sources)) {
467 assert(s->floating);
468 source_disconnect(s);
469 sd_event_source_unref(s);
470 }
471
472 assert(e->n_sources == 0);
473
474 if (e->default_event_ptr)
475 *(e->default_event_ptr) = NULL;
476
477 safe_close(e->epoll_fd);
478 safe_close(e->watchdog_fd);
479
480 free_clock_data(&e->realtime);
481 free_clock_data(&e->boottime);
482 free_clock_data(&e->monotonic);
483 free_clock_data(&e->realtime_alarm);
484 free_clock_data(&e->boottime_alarm);
485
486 prioq_free(e->pending);
487 prioq_free(e->prepare);
488 prioq_free(e->exit);
489
490 free(e->signal_sources);
491 hashmap_free(e->signal_data);
492
493 hashmap_free(e->inotify_data);
494
495 hashmap_free(e->child_sources);
496 set_free(e->post_sources);
497 free(e);
498 }
499
500 _public_ int sd_event_new(sd_event** ret) {
501 sd_event *e;
502 int r;
503
504 assert_return(ret, -EINVAL);
505
506 e = new(sd_event, 1);
507 if (!e)
508 return -ENOMEM;
509
510 *e = (sd_event) {
511 .n_ref = 1,
512 .epoll_fd = -1,
513 .watchdog_fd = -1,
514 .realtime.wakeup = WAKEUP_CLOCK_DATA,
515 .realtime.fd = -1,
516 .realtime.next = USEC_INFINITY,
517 .boottime.wakeup = WAKEUP_CLOCK_DATA,
518 .boottime.fd = -1,
519 .boottime.next = USEC_INFINITY,
520 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
521 .monotonic.fd = -1,
522 .monotonic.next = USEC_INFINITY,
523 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
524 .realtime_alarm.fd = -1,
525 .realtime_alarm.next = USEC_INFINITY,
526 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
527 .boottime_alarm.fd = -1,
528 .boottime_alarm.next = USEC_INFINITY,
529 .perturb = USEC_INFINITY,
530 .original_pid = getpid_cached(),
531 };
532
533 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
534 if (r < 0)
535 goto fail;
536
537 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
538 if (e->epoll_fd < 0) {
539 r = -errno;
540 goto fail;
541 }
542
543 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
544
545 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
546 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
547 e->profile_delays = true;
548 }
549
550 *ret = e;
551 return 0;
552
553 fail:
554 event_free(e);
555 return r;
556 }
557
558 _public_ sd_event* sd_event_ref(sd_event *e) {
559
560 if (!e)
561 return NULL;
562
563 assert(e->n_ref >= 1);
564 e->n_ref++;
565
566 return e;
567 }
568
569 _public_ sd_event* sd_event_unref(sd_event *e) {
570
571 if (!e)
572 return NULL;
573
574 assert(e->n_ref >= 1);
575 e->n_ref--;
576
577 if (e->n_ref <= 0)
578 event_free(e);
579
580 return NULL;
581 }
582
583 static bool event_pid_changed(sd_event *e) {
584 assert(e);
585
586 /* We don't support people creating an event loop and keeping
587 * it around over a fork(). Let's complain. */
588
589 return e->original_pid != getpid_cached();
590 }
591
592 static void source_io_unregister(sd_event_source *s) {
593 int r;
594
595 assert(s);
596 assert(s->type == SOURCE_IO);
597
598 if (event_pid_changed(s->event))
599 return;
600
601 if (!s->io.registered)
602 return;
603
604 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
605 if (r < 0)
606 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
607 strna(s->description), event_source_type_to_string(s->type));
608
609 s->io.registered = false;
610 }
611
612 static int source_io_register(
613 sd_event_source *s,
614 int enabled,
615 uint32_t events) {
616
617 struct epoll_event ev;
618 int r;
619
620 assert(s);
621 assert(s->type == SOURCE_IO);
622 assert(enabled != SD_EVENT_OFF);
623
624 ev = (struct epoll_event) {
625 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
626 .data.ptr = s,
627 };
628
629 if (s->io.registered)
630 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
631 else
632 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
633 if (r < 0)
634 return -errno;
635
636 s->io.registered = true;
637
638 return 0;
639 }
640
641 static clockid_t event_source_type_to_clock(EventSourceType t) {
642
643 switch (t) {
644
645 case SOURCE_TIME_REALTIME:
646 return CLOCK_REALTIME;
647
648 case SOURCE_TIME_BOOTTIME:
649 return CLOCK_BOOTTIME;
650
651 case SOURCE_TIME_MONOTONIC:
652 return CLOCK_MONOTONIC;
653
654 case SOURCE_TIME_REALTIME_ALARM:
655 return CLOCK_REALTIME_ALARM;
656
657 case SOURCE_TIME_BOOTTIME_ALARM:
658 return CLOCK_BOOTTIME_ALARM;
659
660 default:
661 return (clockid_t) -1;
662 }
663 }
664
665 static EventSourceType clock_to_event_source_type(clockid_t clock) {
666
667 switch (clock) {
668
669 case CLOCK_REALTIME:
670 return SOURCE_TIME_REALTIME;
671
672 case CLOCK_BOOTTIME:
673 return SOURCE_TIME_BOOTTIME;
674
675 case CLOCK_MONOTONIC:
676 return SOURCE_TIME_MONOTONIC;
677
678 case CLOCK_REALTIME_ALARM:
679 return SOURCE_TIME_REALTIME_ALARM;
680
681 case CLOCK_BOOTTIME_ALARM:
682 return SOURCE_TIME_BOOTTIME_ALARM;
683
684 default:
685 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
686 }
687 }
688
689 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
690 assert(e);
691
692 switch (t) {
693
694 case SOURCE_TIME_REALTIME:
695 return &e->realtime;
696
697 case SOURCE_TIME_BOOTTIME:
698 return &e->boottime;
699
700 case SOURCE_TIME_MONOTONIC:
701 return &e->monotonic;
702
703 case SOURCE_TIME_REALTIME_ALARM:
704 return &e->realtime_alarm;
705
706 case SOURCE_TIME_BOOTTIME_ALARM:
707 return &e->boottime_alarm;
708
709 default:
710 return NULL;
711 }
712 }
713
714 static int event_make_signal_data(
715 sd_event *e,
716 int sig,
717 struct signal_data **ret) {
718
719 struct epoll_event ev;
720 struct signal_data *d;
721 bool added = false;
722 sigset_t ss_copy;
723 int64_t priority;
724 int r;
725
726 assert(e);
727
728 if (event_pid_changed(e))
729 return -ECHILD;
730
731 if (e->signal_sources && e->signal_sources[sig])
732 priority = e->signal_sources[sig]->priority;
733 else
734 priority = SD_EVENT_PRIORITY_NORMAL;
735
736 d = hashmap_get(e->signal_data, &priority);
737 if (d) {
738 if (sigismember(&d->sigset, sig) > 0) {
739 if (ret)
740 *ret = d;
741 return 0;
742 }
743 } else {
744 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
745 if (r < 0)
746 return r;
747
748 d = new(struct signal_data, 1);
749 if (!d)
750 return -ENOMEM;
751
752 *d = (struct signal_data) {
753 .wakeup = WAKEUP_SIGNAL_DATA,
754 .fd = -1,
755 .priority = priority,
756 };
757
758 r = hashmap_put(e->signal_data, &d->priority, d);
759 if (r < 0) {
760 free(d);
761 return r;
762 }
763
764 added = true;
765 }
766
767 ss_copy = d->sigset;
768 assert_se(sigaddset(&ss_copy, sig) >= 0);
769
770 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
771 if (r < 0) {
772 r = -errno;
773 goto fail;
774 }
775
776 d->sigset = ss_copy;
777
778 if (d->fd >= 0) {
779 if (ret)
780 *ret = d;
781 return 0;
782 }
783
784 d->fd = fd_move_above_stdio(r);
785
786 ev = (struct epoll_event) {
787 .events = EPOLLIN,
788 .data.ptr = d,
789 };
790
791 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
792 if (r < 0) {
793 r = -errno;
794 goto fail;
795 }
796
797 if (ret)
798 *ret = d;
799
800 return 0;
801
802 fail:
803 if (added) {
804 d->fd = safe_close(d->fd);
805 hashmap_remove(e->signal_data, &d->priority);
806 free(d);
807 }
808
809 return r;
810 }
811
812 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
813 assert(e);
814 assert(d);
815
816 /* Turns off the specified signal in the signal data
817 * object. If the signal mask of the object becomes empty that
818 * way removes it. */
819
820 if (sigismember(&d->sigset, sig) == 0)
821 return;
822
823 assert_se(sigdelset(&d->sigset, sig) >= 0);
824
825 if (sigisemptyset(&d->sigset)) {
826
827 /* If all the mask is all-zero we can get rid of the structure */
828 hashmap_remove(e->signal_data, &d->priority);
829 safe_close(d->fd);
830 free(d);
831 return;
832 }
833
834 assert(d->fd >= 0);
835
836 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
837 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
838 }
839
840 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
841 struct signal_data *d;
842 static const int64_t zero_priority = 0;
843
844 assert(e);
845
846 /* Rechecks if the specified signal is still something we are
847 * interested in. If not, we'll unmask it, and possibly drop
848 * the signalfd for it. */
849
850 if (sig == SIGCHLD &&
851 e->n_enabled_child_sources > 0)
852 return;
853
854 if (e->signal_sources &&
855 e->signal_sources[sig] &&
856 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
857 return;
858
859 /*
860 * The specified signal might be enabled in three different queues:
861 *
862 * 1) the one that belongs to the priority passed (if it is non-NULL)
863 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
864 * 3) the 0 priority (to cover the SIGCHLD case)
865 *
866 * Hence, let's remove it from all three here.
867 */
868
869 if (priority) {
870 d = hashmap_get(e->signal_data, priority);
871 if (d)
872 event_unmask_signal_data(e, d, sig);
873 }
874
875 if (e->signal_sources && e->signal_sources[sig]) {
876 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
877 if (d)
878 event_unmask_signal_data(e, d, sig);
879 }
880
881 d = hashmap_get(e->signal_data, &zero_priority);
882 if (d)
883 event_unmask_signal_data(e, d, sig);
884 }
885
886 static void source_disconnect(sd_event_source *s) {
887 sd_event *event;
888
889 assert(s);
890
891 if (!s->event)
892 return;
893
894 assert(s->event->n_sources > 0);
895
896 switch (s->type) {
897
898 case SOURCE_IO:
899 if (s->io.fd >= 0)
900 source_io_unregister(s);
901
902 break;
903
904 case SOURCE_TIME_REALTIME:
905 case SOURCE_TIME_BOOTTIME:
906 case SOURCE_TIME_MONOTONIC:
907 case SOURCE_TIME_REALTIME_ALARM:
908 case SOURCE_TIME_BOOTTIME_ALARM: {
909 struct clock_data *d;
910
911 d = event_get_clock_data(s->event, s->type);
912 assert(d);
913
914 prioq_remove(d->earliest, s, &s->time.earliest_index);
915 prioq_remove(d->latest, s, &s->time.latest_index);
916 d->needs_rearm = true;
917 break;
918 }
919
920 case SOURCE_SIGNAL:
921 if (s->signal.sig > 0) {
922
923 if (s->event->signal_sources)
924 s->event->signal_sources[s->signal.sig] = NULL;
925
926 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
927 }
928
929 break;
930
931 case SOURCE_CHILD:
932 if (s->child.pid > 0) {
933 if (s->enabled != SD_EVENT_OFF) {
934 assert(s->event->n_enabled_child_sources > 0);
935 s->event->n_enabled_child_sources--;
936 }
937
938 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
939 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
940 }
941
942 break;
943
944 case SOURCE_DEFER:
945 /* nothing */
946 break;
947
948 case SOURCE_POST:
949 set_remove(s->event->post_sources, s);
950 break;
951
952 case SOURCE_EXIT:
953 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
954 break;
955
956 case SOURCE_INOTIFY: {
957 struct inode_data *inode_data;
958
959 inode_data = s->inotify.inode_data;
960 if (inode_data) {
961 struct inotify_data *inotify_data;
962 assert_se(inotify_data = inode_data->inotify_data);
963
964 /* Detach this event source from the inode object */
965 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
966 s->inotify.inode_data = NULL;
967
968 if (s->pending) {
969 assert(inotify_data->n_pending > 0);
970 inotify_data->n_pending--;
971 }
972
973 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
974 * continued to being watched. That's because inotify doesn't really have an API for that: we
975 * can only change watch masks with access to the original inode either by fd or by path. But
976 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
977 * continously and keeping the mount busy which we can't really do. We could reconstruct the
978 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
979 * there), but given the need for open_by_handle_at() which is privileged and not universally
980 * available this would be quite an incomplete solution. Hence we go the other way, leave the
981 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
982 * anymore after reception. Yes, this sucks, but … Linux … */
983
984 /* Maybe release the inode data (and its inotify) */
985 event_gc_inode_data(s->event, inode_data);
986 }
987
988 break;
989 }
990
991 default:
992 assert_not_reached("Wut? I shouldn't exist.");
993 }
994
995 if (s->pending)
996 prioq_remove(s->event->pending, s, &s->pending_index);
997
998 if (s->prepare)
999 prioq_remove(s->event->prepare, s, &s->prepare_index);
1000
1001 event = s->event;
1002
1003 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
1004 s->event = NULL;
1005 LIST_REMOVE(sources, event->sources, s);
1006 event->n_sources--;
1007
1008 if (!s->floating)
1009 sd_event_unref(event);
1010 }
1011
1012 static void source_free(sd_event_source *s) {
1013 assert(s);
1014
1015 source_disconnect(s);
1016
1017 if (s->type == SOURCE_IO && s->io.owned)
1018 s->io.fd = safe_close(s->io.fd);
1019
1020 if (s->destroy_callback)
1021 s->destroy_callback(s->userdata);
1022
1023 free(s->description);
1024 free(s);
1025 }
1026
1027 static int source_set_pending(sd_event_source *s, bool b) {
1028 int r;
1029
1030 assert(s);
1031 assert(s->type != SOURCE_EXIT);
1032
1033 if (s->pending == b)
1034 return 0;
1035
1036 s->pending = b;
1037
1038 if (b) {
1039 s->pending_iteration = s->event->iteration;
1040
1041 r = prioq_put(s->event->pending, s, &s->pending_index);
1042 if (r < 0) {
1043 s->pending = false;
1044 return r;
1045 }
1046 } else
1047 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1048
1049 if (EVENT_SOURCE_IS_TIME(s->type)) {
1050 struct clock_data *d;
1051
1052 d = event_get_clock_data(s->event, s->type);
1053 assert(d);
1054
1055 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1056 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1057 d->needs_rearm = true;
1058 }
1059
1060 if (s->type == SOURCE_SIGNAL && !b) {
1061 struct signal_data *d;
1062
1063 d = hashmap_get(s->event->signal_data, &s->priority);
1064 if (d && d->current == s)
1065 d->current = NULL;
1066 }
1067
1068 if (s->type == SOURCE_INOTIFY) {
1069
1070 assert(s->inotify.inode_data);
1071 assert(s->inotify.inode_data->inotify_data);
1072
1073 if (b)
1074 s->inotify.inode_data->inotify_data->n_pending ++;
1075 else {
1076 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1077 s->inotify.inode_data->inotify_data->n_pending --;
1078 }
1079 }
1080
1081 return 0;
1082 }
1083
1084 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1085 sd_event_source *s;
1086
1087 assert(e);
1088
1089 s = new(sd_event_source, 1);
1090 if (!s)
1091 return NULL;
1092
1093 *s = (struct sd_event_source) {
1094 .n_ref = 1,
1095 .event = e,
1096 .floating = floating,
1097 .type = type,
1098 .pending_index = PRIOQ_IDX_NULL,
1099 .prepare_index = PRIOQ_IDX_NULL,
1100 };
1101
1102 if (!floating)
1103 sd_event_ref(e);
1104
1105 LIST_PREPEND(sources, e->sources, s);
1106 e->n_sources++;
1107
1108 return s;
1109 }
1110
1111 _public_ int sd_event_add_io(
1112 sd_event *e,
1113 sd_event_source **ret,
1114 int fd,
1115 uint32_t events,
1116 sd_event_io_handler_t callback,
1117 void *userdata) {
1118
1119 sd_event_source *s;
1120 int r;
1121
1122 assert_return(e, -EINVAL);
1123 assert_return(e = event_resolve(e), -ENOPKG);
1124 assert_return(fd >= 0, -EBADF);
1125 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1126 assert_return(callback, -EINVAL);
1127 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1128 assert_return(!event_pid_changed(e), -ECHILD);
1129
1130 s = source_new(e, !ret, SOURCE_IO);
1131 if (!s)
1132 return -ENOMEM;
1133
1134 s->wakeup = WAKEUP_EVENT_SOURCE;
1135 s->io.fd = fd;
1136 s->io.events = events;
1137 s->io.callback = callback;
1138 s->userdata = userdata;
1139 s->enabled = SD_EVENT_ON;
1140
1141 r = source_io_register(s, s->enabled, events);
1142 if (r < 0) {
1143 source_free(s);
1144 return r;
1145 }
1146
1147 if (ret)
1148 *ret = s;
1149
1150 return 0;
1151 }
1152
1153 static void initialize_perturb(sd_event *e) {
1154 sd_id128_t bootid = {};
1155
1156 /* When we sleep for longer, we try to realign the wakeup to
1157 the same time wihtin each minute/second/250ms, so that
1158 events all across the system can be coalesced into a single
1159 CPU wakeup. However, let's take some system-specific
1160 randomness for this value, so that in a network of systems
1161 with synced clocks timer events are distributed a
1162 bit. Here, we calculate a perturbation usec offset from the
1163 boot ID. */
1164
1165 if (_likely_(e->perturb != USEC_INFINITY))
1166 return;
1167
1168 if (sd_id128_get_boot(&bootid) >= 0)
1169 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1170 }
1171
1172 static int event_setup_timer_fd(
1173 sd_event *e,
1174 struct clock_data *d,
1175 clockid_t clock) {
1176
1177 struct epoll_event ev;
1178 int r, fd;
1179
1180 assert(e);
1181 assert(d);
1182
1183 if (_likely_(d->fd >= 0))
1184 return 0;
1185
1186 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1187 if (fd < 0)
1188 return -errno;
1189
1190 fd = fd_move_above_stdio(fd);
1191
1192 ev = (struct epoll_event) {
1193 .events = EPOLLIN,
1194 .data.ptr = d,
1195 };
1196
1197 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1198 if (r < 0) {
1199 safe_close(fd);
1200 return -errno;
1201 }
1202
1203 d->fd = fd;
1204 return 0;
1205 }
1206
1207 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1208 assert(s);
1209
1210 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1211 }
1212
1213 _public_ int sd_event_add_time(
1214 sd_event *e,
1215 sd_event_source **ret,
1216 clockid_t clock,
1217 uint64_t usec,
1218 uint64_t accuracy,
1219 sd_event_time_handler_t callback,
1220 void *userdata) {
1221
1222 EventSourceType type;
1223 sd_event_source *s;
1224 struct clock_data *d;
1225 int r;
1226
1227 assert_return(e, -EINVAL);
1228 assert_return(e = event_resolve(e), -ENOPKG);
1229 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1230 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1231 assert_return(!event_pid_changed(e), -ECHILD);
1232
1233 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1234 return -EOPNOTSUPP;
1235
1236 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1237 if (type < 0)
1238 return -EOPNOTSUPP;
1239
1240 if (!callback)
1241 callback = time_exit_callback;
1242
1243 d = event_get_clock_data(e, type);
1244 assert(d);
1245
1246 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1247 if (r < 0)
1248 return r;
1249
1250 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1251 if (r < 0)
1252 return r;
1253
1254 if (d->fd < 0) {
1255 r = event_setup_timer_fd(e, d, clock);
1256 if (r < 0)
1257 return r;
1258 }
1259
1260 s = source_new(e, !ret, type);
1261 if (!s)
1262 return -ENOMEM;
1263
1264 s->time.next = usec;
1265 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1266 s->time.callback = callback;
1267 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1268 s->userdata = userdata;
1269 s->enabled = SD_EVENT_ONESHOT;
1270
1271 d->needs_rearm = true;
1272
1273 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1274 if (r < 0)
1275 goto fail;
1276
1277 r = prioq_put(d->latest, s, &s->time.latest_index);
1278 if (r < 0)
1279 goto fail;
1280
1281 if (ret)
1282 *ret = s;
1283
1284 return 0;
1285
1286 fail:
1287 source_free(s);
1288 return r;
1289 }
1290
1291 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1292 assert(s);
1293
1294 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1295 }
1296
1297 _public_ int sd_event_add_signal(
1298 sd_event *e,
1299 sd_event_source **ret,
1300 int sig,
1301 sd_event_signal_handler_t callback,
1302 void *userdata) {
1303
1304 sd_event_source *s;
1305 struct signal_data *d;
1306 sigset_t ss;
1307 int r;
1308
1309 assert_return(e, -EINVAL);
1310 assert_return(e = event_resolve(e), -ENOPKG);
1311 assert_return(SIGNAL_VALID(sig), -EINVAL);
1312 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1313 assert_return(!event_pid_changed(e), -ECHILD);
1314
1315 if (!callback)
1316 callback = signal_exit_callback;
1317
1318 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1319 if (r != 0)
1320 return -r;
1321
1322 if (!sigismember(&ss, sig))
1323 return -EBUSY;
1324
1325 if (!e->signal_sources) {
1326 e->signal_sources = new0(sd_event_source*, _NSIG);
1327 if (!e->signal_sources)
1328 return -ENOMEM;
1329 } else if (e->signal_sources[sig])
1330 return -EBUSY;
1331
1332 s = source_new(e, !ret, SOURCE_SIGNAL);
1333 if (!s)
1334 return -ENOMEM;
1335
1336 s->signal.sig = sig;
1337 s->signal.callback = callback;
1338 s->userdata = userdata;
1339 s->enabled = SD_EVENT_ON;
1340
1341 e->signal_sources[sig] = s;
1342
1343 r = event_make_signal_data(e, sig, &d);
1344 if (r < 0) {
1345 source_free(s);
1346 return r;
1347 }
1348
1349 /* Use the signal name as description for the event source by default */
1350 (void) sd_event_source_set_description(s, signal_to_string(sig));
1351
1352 if (ret)
1353 *ret = s;
1354
1355 return 0;
1356 }
1357
1358 _public_ int sd_event_add_child(
1359 sd_event *e,
1360 sd_event_source **ret,
1361 pid_t pid,
1362 int options,
1363 sd_event_child_handler_t callback,
1364 void *userdata) {
1365
1366 sd_event_source *s;
1367 int r;
1368
1369 assert_return(e, -EINVAL);
1370 assert_return(e = event_resolve(e), -ENOPKG);
1371 assert_return(pid > 1, -EINVAL);
1372 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1373 assert_return(options != 0, -EINVAL);
1374 assert_return(callback, -EINVAL);
1375 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1376 assert_return(!event_pid_changed(e), -ECHILD);
1377
1378 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1379 if (r < 0)
1380 return r;
1381
1382 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1383 return -EBUSY;
1384
1385 s = source_new(e, !ret, SOURCE_CHILD);
1386 if (!s)
1387 return -ENOMEM;
1388
1389 s->child.pid = pid;
1390 s->child.options = options;
1391 s->child.callback = callback;
1392 s->userdata = userdata;
1393 s->enabled = SD_EVENT_ONESHOT;
1394
1395 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1396 if (r < 0) {
1397 source_free(s);
1398 return r;
1399 }
1400
1401 e->n_enabled_child_sources++;
1402
1403 r = event_make_signal_data(e, SIGCHLD, NULL);
1404 if (r < 0) {
1405 e->n_enabled_child_sources--;
1406 source_free(s);
1407 return r;
1408 }
1409
1410 e->need_process_child = true;
1411
1412 if (ret)
1413 *ret = s;
1414
1415 return 0;
1416 }
1417
1418 _public_ int sd_event_add_defer(
1419 sd_event *e,
1420 sd_event_source **ret,
1421 sd_event_handler_t callback,
1422 void *userdata) {
1423
1424 sd_event_source *s;
1425 int r;
1426
1427 assert_return(e, -EINVAL);
1428 assert_return(e = event_resolve(e), -ENOPKG);
1429 assert_return(callback, -EINVAL);
1430 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1431 assert_return(!event_pid_changed(e), -ECHILD);
1432
1433 s = source_new(e, !ret, SOURCE_DEFER);
1434 if (!s)
1435 return -ENOMEM;
1436
1437 s->defer.callback = callback;
1438 s->userdata = userdata;
1439 s->enabled = SD_EVENT_ONESHOT;
1440
1441 r = source_set_pending(s, true);
1442 if (r < 0) {
1443 source_free(s);
1444 return r;
1445 }
1446
1447 if (ret)
1448 *ret = s;
1449
1450 return 0;
1451 }
1452
1453 _public_ int sd_event_add_post(
1454 sd_event *e,
1455 sd_event_source **ret,
1456 sd_event_handler_t callback,
1457 void *userdata) {
1458
1459 sd_event_source *s;
1460 int r;
1461
1462 assert_return(e, -EINVAL);
1463 assert_return(e = event_resolve(e), -ENOPKG);
1464 assert_return(callback, -EINVAL);
1465 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1466 assert_return(!event_pid_changed(e), -ECHILD);
1467
1468 r = set_ensure_allocated(&e->post_sources, NULL);
1469 if (r < 0)
1470 return r;
1471
1472 s = source_new(e, !ret, SOURCE_POST);
1473 if (!s)
1474 return -ENOMEM;
1475
1476 s->post.callback = callback;
1477 s->userdata = userdata;
1478 s->enabled = SD_EVENT_ON;
1479
1480 r = set_put(e->post_sources, s);
1481 if (r < 0) {
1482 source_free(s);
1483 return r;
1484 }
1485
1486 if (ret)
1487 *ret = s;
1488
1489 return 0;
1490 }
1491
1492 _public_ int sd_event_add_exit(
1493 sd_event *e,
1494 sd_event_source **ret,
1495 sd_event_handler_t callback,
1496 void *userdata) {
1497
1498 sd_event_source *s;
1499 int r;
1500
1501 assert_return(e, -EINVAL);
1502 assert_return(e = event_resolve(e), -ENOPKG);
1503 assert_return(callback, -EINVAL);
1504 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1505 assert_return(!event_pid_changed(e), -ECHILD);
1506
1507 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1508 if (r < 0)
1509 return r;
1510
1511 s = source_new(e, !ret, SOURCE_EXIT);
1512 if (!s)
1513 return -ENOMEM;
1514
1515 s->exit.callback = callback;
1516 s->userdata = userdata;
1517 s->exit.prioq_index = PRIOQ_IDX_NULL;
1518 s->enabled = SD_EVENT_ONESHOT;
1519
1520 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1521 if (r < 0) {
1522 source_free(s);
1523 return r;
1524 }
1525
1526 if (ret)
1527 *ret = s;
1528
1529 return 0;
1530 }
1531
1532 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1533 assert(e);
1534
1535 if (!d)
1536 return;
1537
1538 assert(hashmap_isempty(d->inodes));
1539 assert(hashmap_isempty(d->wd));
1540
1541 if (d->buffer_filled > 0)
1542 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1543
1544 hashmap_free(d->inodes);
1545 hashmap_free(d->wd);
1546
1547 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1548
1549 if (d->fd >= 0) {
1550 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1551 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1552
1553 safe_close(d->fd);
1554 }
1555 free(d);
1556 }
1557
1558 static int event_make_inotify_data(
1559 sd_event *e,
1560 int64_t priority,
1561 struct inotify_data **ret) {
1562
1563 _cleanup_close_ int fd = -1;
1564 struct inotify_data *d;
1565 struct epoll_event ev;
1566 int r;
1567
1568 assert(e);
1569
1570 d = hashmap_get(e->inotify_data, &priority);
1571 if (d) {
1572 if (ret)
1573 *ret = d;
1574 return 0;
1575 }
1576
1577 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1578 if (fd < 0)
1579 return -errno;
1580
1581 fd = fd_move_above_stdio(fd);
1582
1583 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1584 if (r < 0)
1585 return r;
1586
1587 d = new(struct inotify_data, 1);
1588 if (!d)
1589 return -ENOMEM;
1590
1591 *d = (struct inotify_data) {
1592 .wakeup = WAKEUP_INOTIFY_DATA,
1593 .fd = TAKE_FD(fd),
1594 .priority = priority,
1595 };
1596
1597 r = hashmap_put(e->inotify_data, &d->priority, d);
1598 if (r < 0) {
1599 d->fd = safe_close(d->fd);
1600 free(d);
1601 return r;
1602 }
1603
1604 ev = (struct epoll_event) {
1605 .events = EPOLLIN,
1606 .data.ptr = d,
1607 };
1608
1609 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1610 r = -errno;
1611 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1612 * remove the fd from the epoll first, which we don't want as we couldn't
1613 * add it in the first place. */
1614 event_free_inotify_data(e, d);
1615 return r;
1616 }
1617
1618 if (ret)
1619 *ret = d;
1620
1621 return 1;
1622 }
1623
1624 static int inode_data_compare(const void *a, const void *b) {
1625 const struct inode_data *x = a, *y = b;
1626
1627 assert(x);
1628 assert(y);
1629
1630 if (x->dev < y->dev)
1631 return -1;
1632 if (x->dev > y->dev)
1633 return 1;
1634
1635 if (x->ino < y->ino)
1636 return -1;
1637 if (x->ino > y->ino)
1638 return 1;
1639
1640 return 0;
1641 }
1642
1643 static void inode_data_hash_func(const void *p, struct siphash *state) {
1644 const struct inode_data *d = p;
1645
1646 assert(p);
1647
1648 siphash24_compress(&d->dev, sizeof(d->dev), state);
1649 siphash24_compress(&d->ino, sizeof(d->ino), state);
1650 }
1651
1652 const struct hash_ops inode_data_hash_ops = {
1653 .hash = inode_data_hash_func,
1654 .compare = inode_data_compare
1655 };
1656
1657 static void event_free_inode_data(
1658 sd_event *e,
1659 struct inode_data *d) {
1660
1661 assert(e);
1662
1663 if (!d)
1664 return;
1665
1666 assert(!d->event_sources);
1667
1668 if (d->fd >= 0) {
1669 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1670 safe_close(d->fd);
1671 }
1672
1673 if (d->inotify_data) {
1674
1675 if (d->wd >= 0) {
1676 if (d->inotify_data->fd >= 0) {
1677 /* So here's a problem. At the time this runs the watch descriptor might already be
1678 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1679 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1680 * likely case to happen. */
1681
1682 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1683 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1684 }
1685
1686 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1687 }
1688
1689 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1690 }
1691
1692 free(d);
1693 }
1694
1695 static void event_gc_inode_data(
1696 sd_event *e,
1697 struct inode_data *d) {
1698
1699 struct inotify_data *inotify_data;
1700
1701 assert(e);
1702
1703 if (!d)
1704 return;
1705
1706 if (d->event_sources)
1707 return;
1708
1709 inotify_data = d->inotify_data;
1710 event_free_inode_data(e, d);
1711
1712 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1713 event_free_inotify_data(e, inotify_data);
1714 }
1715
1716 static int event_make_inode_data(
1717 sd_event *e,
1718 struct inotify_data *inotify_data,
1719 dev_t dev,
1720 ino_t ino,
1721 struct inode_data **ret) {
1722
1723 struct inode_data *d, key;
1724 int r;
1725
1726 assert(e);
1727 assert(inotify_data);
1728
1729 key = (struct inode_data) {
1730 .ino = ino,
1731 .dev = dev,
1732 };
1733
1734 d = hashmap_get(inotify_data->inodes, &key);
1735 if (d) {
1736 if (ret)
1737 *ret = d;
1738
1739 return 0;
1740 }
1741
1742 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1743 if (r < 0)
1744 return r;
1745
1746 d = new(struct inode_data, 1);
1747 if (!d)
1748 return -ENOMEM;
1749
1750 *d = (struct inode_data) {
1751 .dev = dev,
1752 .ino = ino,
1753 .wd = -1,
1754 .fd = -1,
1755 .inotify_data = inotify_data,
1756 };
1757
1758 r = hashmap_put(inotify_data->inodes, d, d);
1759 if (r < 0) {
1760 free(d);
1761 return r;
1762 }
1763
1764 if (ret)
1765 *ret = d;
1766
1767 return 1;
1768 }
1769
1770 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1771 bool excl_unlink = true;
1772 uint32_t combined = 0;
1773 sd_event_source *s;
1774
1775 assert(d);
1776
1777 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1778 * the IN_EXCL_UNLINK flag is ANDed instead.
1779 *
1780 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1781 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1782 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1783 * events we don't care for client-side. */
1784
1785 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1786
1787 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1788 excl_unlink = false;
1789
1790 combined |= s->inotify.mask;
1791 }
1792
1793 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1794 }
1795
1796 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1797 uint32_t combined_mask;
1798 int wd, r;
1799
1800 assert(d);
1801 assert(d->fd >= 0);
1802
1803 combined_mask = inode_data_determine_mask(d);
1804
1805 if (d->wd >= 0 && combined_mask == d->combined_mask)
1806 return 0;
1807
1808 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1809 if (r < 0)
1810 return r;
1811
1812 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1813 if (wd < 0)
1814 return -errno;
1815
1816 if (d->wd < 0) {
1817 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1818 if (r < 0) {
1819 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1820 return r;
1821 }
1822
1823 d->wd = wd;
1824
1825 } else if (d->wd != wd) {
1826
1827 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1828 (void) inotify_rm_watch(d->fd, wd);
1829 return -EINVAL;
1830 }
1831
1832 d->combined_mask = combined_mask;
1833 return 1;
1834 }
1835
1836 _public_ int sd_event_add_inotify(
1837 sd_event *e,
1838 sd_event_source **ret,
1839 const char *path,
1840 uint32_t mask,
1841 sd_event_inotify_handler_t callback,
1842 void *userdata) {
1843
1844 bool rm_inotify = false, rm_inode = false;
1845 struct inotify_data *inotify_data = NULL;
1846 struct inode_data *inode_data = NULL;
1847 _cleanup_close_ int fd = -1;
1848 sd_event_source *s;
1849 struct stat st;
1850 int r;
1851
1852 assert_return(e, -EINVAL);
1853 assert_return(e = event_resolve(e), -ENOPKG);
1854 assert_return(path, -EINVAL);
1855 assert_return(callback, -EINVAL);
1856 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1857 assert_return(!event_pid_changed(e), -ECHILD);
1858
1859 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1860 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1861 * the user can't use them for us. */
1862 if (mask & IN_MASK_ADD)
1863 return -EINVAL;
1864
1865 fd = open(path, O_PATH|O_CLOEXEC|
1866 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1867 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1868 if (fd < 0)
1869 return -errno;
1870
1871 if (fstat(fd, &st) < 0)
1872 return -errno;
1873
1874 s = source_new(e, !ret, SOURCE_INOTIFY);
1875 if (!s)
1876 return -ENOMEM;
1877
1878 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1879 s->inotify.mask = mask;
1880 s->inotify.callback = callback;
1881 s->userdata = userdata;
1882
1883 /* Allocate an inotify object for this priority, and an inode object within it */
1884 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1885 if (r < 0)
1886 goto fail;
1887 rm_inotify = r > 0;
1888
1889 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1890 if (r < 0)
1891 goto fail;
1892 rm_inode = r > 0;
1893
1894 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1895 * the event source, until then, for which we need the original inode. */
1896 if (inode_data->fd < 0) {
1897 inode_data->fd = TAKE_FD(fd);
1898 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1899 }
1900
1901 /* Link our event source to the inode data object */
1902 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1903 s->inotify.inode_data = inode_data;
1904
1905 rm_inode = rm_inotify = false;
1906
1907 /* Actually realize the watch now */
1908 r = inode_data_realize_watch(e, inode_data);
1909 if (r < 0)
1910 goto fail;
1911
1912 (void) sd_event_source_set_description(s, path);
1913
1914 if (ret)
1915 *ret = s;
1916
1917 return 0;
1918
1919 fail:
1920 source_free(s);
1921
1922 if (rm_inode)
1923 event_free_inode_data(e, inode_data);
1924
1925 if (rm_inotify)
1926 event_free_inotify_data(e, inotify_data);
1927
1928 return r;
1929 }
1930
1931 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1932
1933 if (!s)
1934 return NULL;
1935
1936 assert(s->n_ref >= 1);
1937 s->n_ref++;
1938
1939 return s;
1940 }
1941
1942 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1943
1944 if (!s)
1945 return NULL;
1946
1947 assert(s->n_ref >= 1);
1948 s->n_ref--;
1949
1950 if (s->n_ref <= 0) {
1951 /* Here's a special hack: when we are called from a
1952 * dispatch handler we won't free the event source
1953 * immediately, but we will detach the fd from the
1954 * epoll. This way it is safe for the caller to unref
1955 * the event source and immediately close the fd, but
1956 * we still retain a valid event source object after
1957 * the callback. */
1958
1959 if (s->dispatching) {
1960 if (s->type == SOURCE_IO)
1961 source_io_unregister(s);
1962
1963 source_disconnect(s);
1964 } else
1965 source_free(s);
1966 }
1967
1968 return NULL;
1969 }
1970
1971 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1972 assert_return(s, -EINVAL);
1973 assert_return(!event_pid_changed(s->event), -ECHILD);
1974
1975 return free_and_strdup(&s->description, description);
1976 }
1977
1978 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1979 assert_return(s, -EINVAL);
1980 assert_return(description, -EINVAL);
1981 assert_return(s->description, -ENXIO);
1982 assert_return(!event_pid_changed(s->event), -ECHILD);
1983
1984 *description = s->description;
1985 return 0;
1986 }
1987
1988 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1989 assert_return(s, NULL);
1990
1991 return s->event;
1992 }
1993
1994 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1995 assert_return(s, -EINVAL);
1996 assert_return(s->type != SOURCE_EXIT, -EDOM);
1997 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1998 assert_return(!event_pid_changed(s->event), -ECHILD);
1999
2000 return s->pending;
2001 }
2002
2003 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
2004 assert_return(s, -EINVAL);
2005 assert_return(s->type == SOURCE_IO, -EDOM);
2006 assert_return(!event_pid_changed(s->event), -ECHILD);
2007
2008 return s->io.fd;
2009 }
2010
2011 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2012 int r;
2013
2014 assert_return(s, -EINVAL);
2015 assert_return(fd >= 0, -EBADF);
2016 assert_return(s->type == SOURCE_IO, -EDOM);
2017 assert_return(!event_pid_changed(s->event), -ECHILD);
2018
2019 if (s->io.fd == fd)
2020 return 0;
2021
2022 if (s->enabled == SD_EVENT_OFF) {
2023 s->io.fd = fd;
2024 s->io.registered = false;
2025 } else {
2026 int saved_fd;
2027
2028 saved_fd = s->io.fd;
2029 assert(s->io.registered);
2030
2031 s->io.fd = fd;
2032 s->io.registered = false;
2033
2034 r = source_io_register(s, s->enabled, s->io.events);
2035 if (r < 0) {
2036 s->io.fd = saved_fd;
2037 s->io.registered = true;
2038 return r;
2039 }
2040
2041 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2042 }
2043
2044 return 0;
2045 }
2046
2047 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2048 assert_return(s, -EINVAL);
2049 assert_return(s->type == SOURCE_IO, -EDOM);
2050
2051 return s->io.owned;
2052 }
2053
2054 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2055 assert_return(s, -EINVAL);
2056 assert_return(s->type == SOURCE_IO, -EDOM);
2057
2058 s->io.owned = own;
2059 return 0;
2060 }
2061
2062 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2063 assert_return(s, -EINVAL);
2064 assert_return(events, -EINVAL);
2065 assert_return(s->type == SOURCE_IO, -EDOM);
2066 assert_return(!event_pid_changed(s->event), -ECHILD);
2067
2068 *events = s->io.events;
2069 return 0;
2070 }
2071
2072 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2073 int r;
2074
2075 assert_return(s, -EINVAL);
2076 assert_return(s->type == SOURCE_IO, -EDOM);
2077 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2078 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2079 assert_return(!event_pid_changed(s->event), -ECHILD);
2080
2081 /* edge-triggered updates are never skipped, so we can reset edges */
2082 if (s->io.events == events && !(events & EPOLLET))
2083 return 0;
2084
2085 r = source_set_pending(s, false);
2086 if (r < 0)
2087 return r;
2088
2089 if (s->enabled != SD_EVENT_OFF) {
2090 r = source_io_register(s, s->enabled, events);
2091 if (r < 0)
2092 return r;
2093 }
2094
2095 s->io.events = events;
2096
2097 return 0;
2098 }
2099
2100 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2101 assert_return(s, -EINVAL);
2102 assert_return(revents, -EINVAL);
2103 assert_return(s->type == SOURCE_IO, -EDOM);
2104 assert_return(s->pending, -ENODATA);
2105 assert_return(!event_pid_changed(s->event), -ECHILD);
2106
2107 *revents = s->io.revents;
2108 return 0;
2109 }
2110
2111 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2112 assert_return(s, -EINVAL);
2113 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2114 assert_return(!event_pid_changed(s->event), -ECHILD);
2115
2116 return s->signal.sig;
2117 }
2118
2119 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2120 assert_return(s, -EINVAL);
2121 assert_return(!event_pid_changed(s->event), -ECHILD);
2122
2123 *priority = s->priority;
2124 return 0;
2125 }
2126
2127 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2128 bool rm_inotify = false, rm_inode = false;
2129 struct inotify_data *new_inotify_data = NULL;
2130 struct inode_data *new_inode_data = NULL;
2131 int r;
2132
2133 assert_return(s, -EINVAL);
2134 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2135 assert_return(!event_pid_changed(s->event), -ECHILD);
2136
2137 if (s->priority == priority)
2138 return 0;
2139
2140 if (s->type == SOURCE_INOTIFY) {
2141 struct inode_data *old_inode_data;
2142
2143 assert(s->inotify.inode_data);
2144 old_inode_data = s->inotify.inode_data;
2145
2146 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2147 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2148 * events we allow priority changes only until the first following iteration. */
2149 if (old_inode_data->fd < 0)
2150 return -EOPNOTSUPP;
2151
2152 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2153 if (r < 0)
2154 return r;
2155 rm_inotify = r > 0;
2156
2157 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2158 if (r < 0)
2159 goto fail;
2160 rm_inode = r > 0;
2161
2162 if (new_inode_data->fd < 0) {
2163 /* Duplicate the fd for the new inode object if we don't have any yet */
2164 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2165 if (new_inode_data->fd < 0) {
2166 r = -errno;
2167 goto fail;
2168 }
2169
2170 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2171 }
2172
2173 /* Move the event source to the new inode data structure */
2174 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2175 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2176 s->inotify.inode_data = new_inode_data;
2177
2178 /* Now create the new watch */
2179 r = inode_data_realize_watch(s->event, new_inode_data);
2180 if (r < 0) {
2181 /* Move it back */
2182 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2183 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2184 s->inotify.inode_data = old_inode_data;
2185 goto fail;
2186 }
2187
2188 s->priority = priority;
2189
2190 event_gc_inode_data(s->event, old_inode_data);
2191
2192 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2193 struct signal_data *old, *d;
2194
2195 /* Move us from the signalfd belonging to the old
2196 * priority to the signalfd of the new priority */
2197
2198 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2199
2200 s->priority = priority;
2201
2202 r = event_make_signal_data(s->event, s->signal.sig, &d);
2203 if (r < 0) {
2204 s->priority = old->priority;
2205 return r;
2206 }
2207
2208 event_unmask_signal_data(s->event, old, s->signal.sig);
2209 } else
2210 s->priority = priority;
2211
2212 if (s->pending)
2213 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2214
2215 if (s->prepare)
2216 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2217
2218 if (s->type == SOURCE_EXIT)
2219 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2220
2221 return 0;
2222
2223 fail:
2224 if (rm_inode)
2225 event_free_inode_data(s->event, new_inode_data);
2226
2227 if (rm_inotify)
2228 event_free_inotify_data(s->event, new_inotify_data);
2229
2230 return r;
2231 }
2232
2233 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2234 assert_return(s, -EINVAL);
2235 assert_return(m, -EINVAL);
2236 assert_return(!event_pid_changed(s->event), -ECHILD);
2237
2238 *m = s->enabled;
2239 return 0;
2240 }
2241
2242 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2243 int r;
2244
2245 assert_return(s, -EINVAL);
2246 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2247 assert_return(!event_pid_changed(s->event), -ECHILD);
2248
2249 /* If we are dead anyway, we are fine with turning off
2250 * sources, but everything else needs to fail. */
2251 if (s->event->state == SD_EVENT_FINISHED)
2252 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2253
2254 if (s->enabled == m)
2255 return 0;
2256
2257 if (m == SD_EVENT_OFF) {
2258
2259 /* Unset the pending flag when this event source is disabled */
2260 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2261 r = source_set_pending(s, false);
2262 if (r < 0)
2263 return r;
2264 }
2265
2266 switch (s->type) {
2267
2268 case SOURCE_IO:
2269 source_io_unregister(s);
2270 s->enabled = m;
2271 break;
2272
2273 case SOURCE_TIME_REALTIME:
2274 case SOURCE_TIME_BOOTTIME:
2275 case SOURCE_TIME_MONOTONIC:
2276 case SOURCE_TIME_REALTIME_ALARM:
2277 case SOURCE_TIME_BOOTTIME_ALARM: {
2278 struct clock_data *d;
2279
2280 s->enabled = m;
2281 d = event_get_clock_data(s->event, s->type);
2282 assert(d);
2283
2284 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2285 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2286 d->needs_rearm = true;
2287 break;
2288 }
2289
2290 case SOURCE_SIGNAL:
2291 s->enabled = m;
2292
2293 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2294 break;
2295
2296 case SOURCE_CHILD:
2297 s->enabled = m;
2298
2299 assert(s->event->n_enabled_child_sources > 0);
2300 s->event->n_enabled_child_sources--;
2301
2302 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2303 break;
2304
2305 case SOURCE_EXIT:
2306 s->enabled = m;
2307 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2308 break;
2309
2310 case SOURCE_DEFER:
2311 case SOURCE_POST:
2312 case SOURCE_INOTIFY:
2313 s->enabled = m;
2314 break;
2315
2316 default:
2317 assert_not_reached("Wut? I shouldn't exist.");
2318 }
2319
2320 } else {
2321
2322 /* Unset the pending flag when this event source is enabled */
2323 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2324 r = source_set_pending(s, false);
2325 if (r < 0)
2326 return r;
2327 }
2328
2329 switch (s->type) {
2330
2331 case SOURCE_IO:
2332 r = source_io_register(s, m, s->io.events);
2333 if (r < 0)
2334 return r;
2335
2336 s->enabled = m;
2337 break;
2338
2339 case SOURCE_TIME_REALTIME:
2340 case SOURCE_TIME_BOOTTIME:
2341 case SOURCE_TIME_MONOTONIC:
2342 case SOURCE_TIME_REALTIME_ALARM:
2343 case SOURCE_TIME_BOOTTIME_ALARM: {
2344 struct clock_data *d;
2345
2346 s->enabled = m;
2347 d = event_get_clock_data(s->event, s->type);
2348 assert(d);
2349
2350 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2351 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2352 d->needs_rearm = true;
2353 break;
2354 }
2355
2356 case SOURCE_SIGNAL:
2357
2358 s->enabled = m;
2359
2360 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2361 if (r < 0) {
2362 s->enabled = SD_EVENT_OFF;
2363 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2364 return r;
2365 }
2366
2367 break;
2368
2369 case SOURCE_CHILD:
2370
2371 if (s->enabled == SD_EVENT_OFF)
2372 s->event->n_enabled_child_sources++;
2373
2374 s->enabled = m;
2375
2376 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2377 if (r < 0) {
2378 s->enabled = SD_EVENT_OFF;
2379 s->event->n_enabled_child_sources--;
2380 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2381 return r;
2382 }
2383
2384 break;
2385
2386 case SOURCE_EXIT:
2387 s->enabled = m;
2388 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2389 break;
2390
2391 case SOURCE_DEFER:
2392 case SOURCE_POST:
2393 case SOURCE_INOTIFY:
2394 s->enabled = m;
2395 break;
2396
2397 default:
2398 assert_not_reached("Wut? I shouldn't exist.");
2399 }
2400 }
2401
2402 if (s->pending)
2403 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2404
2405 if (s->prepare)
2406 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2407
2408 return 0;
2409 }
2410
2411 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2412 assert_return(s, -EINVAL);
2413 assert_return(usec, -EINVAL);
2414 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2415 assert_return(!event_pid_changed(s->event), -ECHILD);
2416
2417 *usec = s->time.next;
2418 return 0;
2419 }
2420
2421 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2422 struct clock_data *d;
2423 int r;
2424
2425 assert_return(s, -EINVAL);
2426 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2427 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2428 assert_return(!event_pid_changed(s->event), -ECHILD);
2429
2430 r = source_set_pending(s, false);
2431 if (r < 0)
2432 return r;
2433
2434 s->time.next = usec;
2435
2436 d = event_get_clock_data(s->event, s->type);
2437 assert(d);
2438
2439 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2440 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2441 d->needs_rearm = true;
2442
2443 return 0;
2444 }
2445
2446 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2447 assert_return(s, -EINVAL);
2448 assert_return(usec, -EINVAL);
2449 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2450 assert_return(!event_pid_changed(s->event), -ECHILD);
2451
2452 *usec = s->time.accuracy;
2453 return 0;
2454 }
2455
2456 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2457 struct clock_data *d;
2458 int r;
2459
2460 assert_return(s, -EINVAL);
2461 assert_return(usec != (uint64_t) -1, -EINVAL);
2462 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2463 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2464 assert_return(!event_pid_changed(s->event), -ECHILD);
2465
2466 r = source_set_pending(s, false);
2467 if (r < 0)
2468 return r;
2469
2470 if (usec == 0)
2471 usec = DEFAULT_ACCURACY_USEC;
2472
2473 s->time.accuracy = usec;
2474
2475 d = event_get_clock_data(s->event, s->type);
2476 assert(d);
2477
2478 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2479 d->needs_rearm = true;
2480
2481 return 0;
2482 }
2483
2484 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2485 assert_return(s, -EINVAL);
2486 assert_return(clock, -EINVAL);
2487 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2488 assert_return(!event_pid_changed(s->event), -ECHILD);
2489
2490 *clock = event_source_type_to_clock(s->type);
2491 return 0;
2492 }
2493
2494 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2495 assert_return(s, -EINVAL);
2496 assert_return(pid, -EINVAL);
2497 assert_return(s->type == SOURCE_CHILD, -EDOM);
2498 assert_return(!event_pid_changed(s->event), -ECHILD);
2499
2500 *pid = s->child.pid;
2501 return 0;
2502 }
2503
2504 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2505 assert_return(s, -EINVAL);
2506 assert_return(mask, -EINVAL);
2507 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2508 assert_return(!event_pid_changed(s->event), -ECHILD);
2509
2510 *mask = s->inotify.mask;
2511 return 0;
2512 }
2513
2514 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2515 int r;
2516
2517 assert_return(s, -EINVAL);
2518 assert_return(s->type != SOURCE_EXIT, -EDOM);
2519 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2520 assert_return(!event_pid_changed(s->event), -ECHILD);
2521
2522 if (s->prepare == callback)
2523 return 0;
2524
2525 if (callback && s->prepare) {
2526 s->prepare = callback;
2527 return 0;
2528 }
2529
2530 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2531 if (r < 0)
2532 return r;
2533
2534 s->prepare = callback;
2535
2536 if (callback) {
2537 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2538 if (r < 0)
2539 return r;
2540 } else
2541 prioq_remove(s->event->prepare, s, &s->prepare_index);
2542
2543 return 0;
2544 }
2545
2546 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2547 assert_return(s, NULL);
2548
2549 return s->userdata;
2550 }
2551
2552 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2553 void *ret;
2554
2555 assert_return(s, NULL);
2556
2557 ret = s->userdata;
2558 s->userdata = userdata;
2559
2560 return ret;
2561 }
2562
2563 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2564 usec_t c;
2565 assert(e);
2566 assert(a <= b);
2567
2568 if (a <= 0)
2569 return 0;
2570 if (a >= USEC_INFINITY)
2571 return USEC_INFINITY;
2572
2573 if (b <= a + 1)
2574 return a;
2575
2576 initialize_perturb(e);
2577
2578 /*
2579 Find a good time to wake up again between times a and b. We
2580 have two goals here:
2581
2582 a) We want to wake up as seldom as possible, hence prefer
2583 later times over earlier times.
2584
2585 b) But if we have to wake up, then let's make sure to
2586 dispatch as much as possible on the entire system.
2587
2588 We implement this by waking up everywhere at the same time
2589 within any given minute if we can, synchronised via the
2590 perturbation value determined from the boot ID. If we can't,
2591 then we try to find the same spot in every 10s, then 1s and
2592 then 250ms step. Otherwise, we pick the last possible time
2593 to wake up.
2594 */
2595
2596 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2597 if (c >= b) {
2598 if (_unlikely_(c < USEC_PER_MINUTE))
2599 return b;
2600
2601 c -= USEC_PER_MINUTE;
2602 }
2603
2604 if (c >= a)
2605 return c;
2606
2607 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2608 if (c >= b) {
2609 if (_unlikely_(c < USEC_PER_SEC*10))
2610 return b;
2611
2612 c -= USEC_PER_SEC*10;
2613 }
2614
2615 if (c >= a)
2616 return c;
2617
2618 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2619 if (c >= b) {
2620 if (_unlikely_(c < USEC_PER_SEC))
2621 return b;
2622
2623 c -= USEC_PER_SEC;
2624 }
2625
2626 if (c >= a)
2627 return c;
2628
2629 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2630 if (c >= b) {
2631 if (_unlikely_(c < USEC_PER_MSEC*250))
2632 return b;
2633
2634 c -= USEC_PER_MSEC*250;
2635 }
2636
2637 if (c >= a)
2638 return c;
2639
2640 return b;
2641 }
2642
2643 static int event_arm_timer(
2644 sd_event *e,
2645 struct clock_data *d) {
2646
2647 struct itimerspec its = {};
2648 sd_event_source *a, *b;
2649 usec_t t;
2650 int r;
2651
2652 assert(e);
2653 assert(d);
2654
2655 if (!d->needs_rearm)
2656 return 0;
2657 else
2658 d->needs_rearm = false;
2659
2660 a = prioq_peek(d->earliest);
2661 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2662
2663 if (d->fd < 0)
2664 return 0;
2665
2666 if (d->next == USEC_INFINITY)
2667 return 0;
2668
2669 /* disarm */
2670 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2671 if (r < 0)
2672 return r;
2673
2674 d->next = USEC_INFINITY;
2675 return 0;
2676 }
2677
2678 b = prioq_peek(d->latest);
2679 assert_se(b && b->enabled != SD_EVENT_OFF);
2680
2681 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2682 if (d->next == t)
2683 return 0;
2684
2685 assert_se(d->fd >= 0);
2686
2687 if (t == 0) {
2688 /* We don' want to disarm here, just mean some time looooong ago. */
2689 its.it_value.tv_sec = 0;
2690 its.it_value.tv_nsec = 1;
2691 } else
2692 timespec_store(&its.it_value, t);
2693
2694 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2695 if (r < 0)
2696 return -errno;
2697
2698 d->next = t;
2699 return 0;
2700 }
2701
2702 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2703 assert(e);
2704 assert(s);
2705 assert(s->type == SOURCE_IO);
2706
2707 /* If the event source was already pending, we just OR in the
2708 * new revents, otherwise we reset the value. The ORing is
2709 * necessary to handle EPOLLONESHOT events properly where
2710 * readability might happen independently of writability, and
2711 * we need to keep track of both */
2712
2713 if (s->pending)
2714 s->io.revents |= revents;
2715 else
2716 s->io.revents = revents;
2717
2718 return source_set_pending(s, true);
2719 }
2720
2721 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2722 uint64_t x;
2723 ssize_t ss;
2724
2725 assert(e);
2726 assert(fd >= 0);
2727
2728 assert_return(events == EPOLLIN, -EIO);
2729
2730 ss = read(fd, &x, sizeof(x));
2731 if (ss < 0) {
2732 if (IN_SET(errno, EAGAIN, EINTR))
2733 return 0;
2734
2735 return -errno;
2736 }
2737
2738 if (_unlikely_(ss != sizeof(x)))
2739 return -EIO;
2740
2741 if (next)
2742 *next = USEC_INFINITY;
2743
2744 return 0;
2745 }
2746
2747 static int process_timer(
2748 sd_event *e,
2749 usec_t n,
2750 struct clock_data *d) {
2751
2752 sd_event_source *s;
2753 int r;
2754
2755 assert(e);
2756 assert(d);
2757
2758 for (;;) {
2759 s = prioq_peek(d->earliest);
2760 if (!s ||
2761 s->time.next > n ||
2762 s->enabled == SD_EVENT_OFF ||
2763 s->pending)
2764 break;
2765
2766 r = source_set_pending(s, true);
2767 if (r < 0)
2768 return r;
2769
2770 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2771 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2772 d->needs_rearm = true;
2773 }
2774
2775 return 0;
2776 }
2777
2778 static int process_child(sd_event *e) {
2779 sd_event_source *s;
2780 Iterator i;
2781 int r;
2782
2783 assert(e);
2784
2785 e->need_process_child = false;
2786
2787 /*
2788 So, this is ugly. We iteratively invoke waitid() with P_PID
2789 + WNOHANG for each PID we wait for, instead of using
2790 P_ALL. This is because we only want to get child
2791 information of very specific child processes, and not all
2792 of them. We might not have processed the SIGCHLD even of a
2793 previous invocation and we don't want to maintain a
2794 unbounded *per-child* event queue, hence we really don't
2795 want anything flushed out of the kernel's queue that we
2796 don't care about. Since this is O(n) this means that if you
2797 have a lot of processes you probably want to handle SIGCHLD
2798 yourself.
2799
2800 We do not reap the children here (by using WNOWAIT), this
2801 is only done after the event source is dispatched so that
2802 the callback still sees the process as a zombie.
2803 */
2804
2805 HASHMAP_FOREACH(s, e->child_sources, i) {
2806 assert(s->type == SOURCE_CHILD);
2807
2808 if (s->pending)
2809 continue;
2810
2811 if (s->enabled == SD_EVENT_OFF)
2812 continue;
2813
2814 zero(s->child.siginfo);
2815 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2816 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2817 if (r < 0)
2818 return -errno;
2819
2820 if (s->child.siginfo.si_pid != 0) {
2821 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2822
2823 if (!zombie && (s->child.options & WEXITED)) {
2824 /* If the child isn't dead then let's
2825 * immediately remove the state change
2826 * from the queue, since there's no
2827 * benefit in leaving it queued */
2828
2829 assert(s->child.options & (WSTOPPED|WCONTINUED));
2830 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2831 }
2832
2833 r = source_set_pending(s, true);
2834 if (r < 0)
2835 return r;
2836 }
2837 }
2838
2839 return 0;
2840 }
2841
2842 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2843 bool read_one = false;
2844 int r;
2845
2846 assert(e);
2847 assert(d);
2848 assert_return(events == EPOLLIN, -EIO);
2849
2850 /* If there's a signal queued on this priority and SIGCHLD is
2851 on this priority too, then make sure to recheck the
2852 children we watch. This is because we only ever dequeue
2853 the first signal per priority, and if we dequeue one, and
2854 SIGCHLD might be enqueued later we wouldn't know, but we
2855 might have higher priority children we care about hence we
2856 need to check that explicitly. */
2857
2858 if (sigismember(&d->sigset, SIGCHLD))
2859 e->need_process_child = true;
2860
2861 /* If there's already an event source pending for this
2862 * priority we don't read another */
2863 if (d->current)
2864 return 0;
2865
2866 for (;;) {
2867 struct signalfd_siginfo si;
2868 ssize_t n;
2869 sd_event_source *s = NULL;
2870
2871 n = read(d->fd, &si, sizeof(si));
2872 if (n < 0) {
2873 if (IN_SET(errno, EAGAIN, EINTR))
2874 return read_one;
2875
2876 return -errno;
2877 }
2878
2879 if (_unlikely_(n != sizeof(si)))
2880 return -EIO;
2881
2882 assert(SIGNAL_VALID(si.ssi_signo));
2883
2884 read_one = true;
2885
2886 if (e->signal_sources)
2887 s = e->signal_sources[si.ssi_signo];
2888 if (!s)
2889 continue;
2890 if (s->pending)
2891 continue;
2892
2893 s->signal.siginfo = si;
2894 d->current = s;
2895
2896 r = source_set_pending(s, true);
2897 if (r < 0)
2898 return r;
2899
2900 return 1;
2901 }
2902 }
2903
2904 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2905 ssize_t n;
2906
2907 assert(e);
2908 assert(d);
2909
2910 assert_return(revents == EPOLLIN, -EIO);
2911
2912 /* If there's already an event source pending for this priority, don't read another */
2913 if (d->n_pending > 0)
2914 return 0;
2915
2916 /* Is the read buffer non-empty? If so, let's not read more */
2917 if (d->buffer_filled > 0)
2918 return 0;
2919
2920 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2921 if (n < 0) {
2922 if (IN_SET(errno, EAGAIN, EINTR))
2923 return 0;
2924
2925 return -errno;
2926 }
2927
2928 assert(n > 0);
2929 d->buffer_filled = (size_t) n;
2930 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2931
2932 return 1;
2933 }
2934
2935 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2936 assert(e);
2937 assert(d);
2938 assert(sz <= d->buffer_filled);
2939
2940 if (sz == 0)
2941 return;
2942
2943 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2944 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2945 d->buffer_filled -= sz;
2946
2947 if (d->buffer_filled == 0)
2948 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2949 }
2950
2951 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2952 int r;
2953
2954 assert(e);
2955 assert(d);
2956
2957 /* If there's already an event source pending for this priority, don't read another */
2958 if (d->n_pending > 0)
2959 return 0;
2960
2961 while (d->buffer_filled > 0) {
2962 size_t sz;
2963
2964 /* Let's validate that the event structures are complete */
2965 if (d->buffer_filled < offsetof(struct inotify_event, name))
2966 return -EIO;
2967
2968 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2969 if (d->buffer_filled < sz)
2970 return -EIO;
2971
2972 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2973 struct inode_data *inode_data;
2974 Iterator i;
2975
2976 /* The queue overran, let's pass this event to all event sources connected to this inotify
2977 * object */
2978
2979 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2980 sd_event_source *s;
2981
2982 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2983
2984 if (s->enabled == SD_EVENT_OFF)
2985 continue;
2986
2987 r = source_set_pending(s, true);
2988 if (r < 0)
2989 return r;
2990 }
2991 }
2992 } else {
2993 struct inode_data *inode_data;
2994 sd_event_source *s;
2995
2996 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2997 * our watch descriptor table. */
2998 if (d->buffer.ev.mask & IN_IGNORED) {
2999
3000 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3001 if (!inode_data) {
3002 event_inotify_data_drop(e, d, sz);
3003 continue;
3004 }
3005
3006 /* The watch descriptor was removed by the kernel, let's drop it here too */
3007 inode_data->wd = -1;
3008 } else {
3009 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3010 if (!inode_data) {
3011 event_inotify_data_drop(e, d, sz);
3012 continue;
3013 }
3014 }
3015
3016 /* Trigger all event sources that are interested in these events. Also trigger all event
3017 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3018 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3019
3020 if (s->enabled == SD_EVENT_OFF)
3021 continue;
3022
3023 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3024 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3025 continue;
3026
3027 r = source_set_pending(s, true);
3028 if (r < 0)
3029 return r;
3030 }
3031 }
3032
3033 /* Something pending now? If so, let's finish, otherwise let's read more. */
3034 if (d->n_pending > 0)
3035 return 1;
3036 }
3037
3038 return 0;
3039 }
3040
3041 static int process_inotify(sd_event *e) {
3042 struct inotify_data *d;
3043 int r, done = 0;
3044
3045 assert(e);
3046
3047 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3048 r = event_inotify_data_process(e, d);
3049 if (r < 0)
3050 return r;
3051 if (r > 0)
3052 done ++;
3053 }
3054
3055 return done;
3056 }
3057
3058 static int source_dispatch(sd_event_source *s) {
3059 EventSourceType saved_type;
3060 int r = 0;
3061
3062 assert(s);
3063 assert(s->pending || s->type == SOURCE_EXIT);
3064
3065 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3066 * the event. */
3067 saved_type = s->type;
3068
3069 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3070 r = source_set_pending(s, false);
3071 if (r < 0)
3072 return r;
3073 }
3074
3075 if (s->type != SOURCE_POST) {
3076 sd_event_source *z;
3077 Iterator i;
3078
3079 /* If we execute a non-post source, let's mark all
3080 * post sources as pending */
3081
3082 SET_FOREACH(z, s->event->post_sources, i) {
3083 if (z->enabled == SD_EVENT_OFF)
3084 continue;
3085
3086 r = source_set_pending(z, true);
3087 if (r < 0)
3088 return r;
3089 }
3090 }
3091
3092 if (s->enabled == SD_EVENT_ONESHOT) {
3093 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3094 if (r < 0)
3095 return r;
3096 }
3097
3098 s->dispatching = true;
3099
3100 switch (s->type) {
3101
3102 case SOURCE_IO:
3103 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3104 break;
3105
3106 case SOURCE_TIME_REALTIME:
3107 case SOURCE_TIME_BOOTTIME:
3108 case SOURCE_TIME_MONOTONIC:
3109 case SOURCE_TIME_REALTIME_ALARM:
3110 case SOURCE_TIME_BOOTTIME_ALARM:
3111 r = s->time.callback(s, s->time.next, s->userdata);
3112 break;
3113
3114 case SOURCE_SIGNAL:
3115 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3116 break;
3117
3118 case SOURCE_CHILD: {
3119 bool zombie;
3120
3121 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3122
3123 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3124
3125 /* Now, reap the PID for good. */
3126 if (zombie)
3127 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3128
3129 break;
3130 }
3131
3132 case SOURCE_DEFER:
3133 r = s->defer.callback(s, s->userdata);
3134 break;
3135
3136 case SOURCE_POST:
3137 r = s->post.callback(s, s->userdata);
3138 break;
3139
3140 case SOURCE_EXIT:
3141 r = s->exit.callback(s, s->userdata);
3142 break;
3143
3144 case SOURCE_INOTIFY: {
3145 struct sd_event *e = s->event;
3146 struct inotify_data *d;
3147 size_t sz;
3148
3149 assert(s->inotify.inode_data);
3150 assert_se(d = s->inotify.inode_data->inotify_data);
3151
3152 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3153 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3154 assert(d->buffer_filled >= sz);
3155
3156 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3157
3158 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3159 * buffer. */
3160 if (d->n_pending == 0)
3161 event_inotify_data_drop(e, d, sz);
3162
3163 break;
3164 }
3165
3166 case SOURCE_WATCHDOG:
3167 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3168 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3169 assert_not_reached("Wut? I shouldn't exist.");
3170 }
3171
3172 s->dispatching = false;
3173
3174 if (r < 0)
3175 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3176 strna(s->description), event_source_type_to_string(saved_type));
3177
3178 if (s->n_ref == 0)
3179 source_free(s);
3180 else if (r < 0)
3181 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3182
3183 return 1;
3184 }
3185
3186 static int event_prepare(sd_event *e) {
3187 int r;
3188
3189 assert(e);
3190
3191 for (;;) {
3192 sd_event_source *s;
3193
3194 s = prioq_peek(e->prepare);
3195 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3196 break;
3197
3198 s->prepare_iteration = e->iteration;
3199 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3200 if (r < 0)
3201 return r;
3202
3203 assert(s->prepare);
3204
3205 s->dispatching = true;
3206 r = s->prepare(s, s->userdata);
3207 s->dispatching = false;
3208
3209 if (r < 0)
3210 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3211 strna(s->description), event_source_type_to_string(s->type));
3212
3213 if (s->n_ref == 0)
3214 source_free(s);
3215 else if (r < 0)
3216 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3217 }
3218
3219 return 0;
3220 }
3221
3222 static int dispatch_exit(sd_event *e) {
3223 sd_event_source *p;
3224 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3225 int r;
3226
3227 assert(e);
3228
3229 p = prioq_peek(e->exit);
3230 if (!p || p->enabled == SD_EVENT_OFF) {
3231 e->state = SD_EVENT_FINISHED;
3232 return 0;
3233 }
3234
3235 ref = sd_event_ref(e);
3236 e->iteration++;
3237 e->state = SD_EVENT_EXITING;
3238 r = source_dispatch(p);
3239 e->state = SD_EVENT_INITIAL;
3240 return r;
3241 }
3242
3243 static sd_event_source* event_next_pending(sd_event *e) {
3244 sd_event_source *p;
3245
3246 assert(e);
3247
3248 p = prioq_peek(e->pending);
3249 if (!p)
3250 return NULL;
3251
3252 if (p->enabled == SD_EVENT_OFF)
3253 return NULL;
3254
3255 return p;
3256 }
3257
3258 static int arm_watchdog(sd_event *e) {
3259 struct itimerspec its = {};
3260 usec_t t;
3261 int r;
3262
3263 assert(e);
3264 assert(e->watchdog_fd >= 0);
3265
3266 t = sleep_between(e,
3267 e->watchdog_last + (e->watchdog_period / 2),
3268 e->watchdog_last + (e->watchdog_period * 3 / 4));
3269
3270 timespec_store(&its.it_value, t);
3271
3272 /* Make sure we never set the watchdog to 0, which tells the
3273 * kernel to disable it. */
3274 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3275 its.it_value.tv_nsec = 1;
3276
3277 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3278 if (r < 0)
3279 return -errno;
3280
3281 return 0;
3282 }
3283
3284 static int process_watchdog(sd_event *e) {
3285 assert(e);
3286
3287 if (!e->watchdog)
3288 return 0;
3289
3290 /* Don't notify watchdog too often */
3291 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3292 return 0;
3293
3294 sd_notify(false, "WATCHDOG=1");
3295 e->watchdog_last = e->timestamp.monotonic;
3296
3297 return arm_watchdog(e);
3298 }
3299
3300 static void event_close_inode_data_fds(sd_event *e) {
3301 struct inode_data *d;
3302
3303 assert(e);
3304
3305 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3306 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3307 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3308 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3309 * compromise. */
3310
3311 while ((d = e->inode_data_to_close)) {
3312 assert(d->fd >= 0);
3313 d->fd = safe_close(d->fd);
3314
3315 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3316 }
3317 }
3318
3319 _public_ int sd_event_prepare(sd_event *e) {
3320 int r;
3321
3322 assert_return(e, -EINVAL);
3323 assert_return(e = event_resolve(e), -ENOPKG);
3324 assert_return(!event_pid_changed(e), -ECHILD);
3325 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3326 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3327
3328 if (e->exit_requested)
3329 goto pending;
3330
3331 e->iteration++;
3332
3333 e->state = SD_EVENT_PREPARING;
3334 r = event_prepare(e);
3335 e->state = SD_EVENT_INITIAL;
3336 if (r < 0)
3337 return r;
3338
3339 r = event_arm_timer(e, &e->realtime);
3340 if (r < 0)
3341 return r;
3342
3343 r = event_arm_timer(e, &e->boottime);
3344 if (r < 0)
3345 return r;
3346
3347 r = event_arm_timer(e, &e->monotonic);
3348 if (r < 0)
3349 return r;
3350
3351 r = event_arm_timer(e, &e->realtime_alarm);
3352 if (r < 0)
3353 return r;
3354
3355 r = event_arm_timer(e, &e->boottime_alarm);
3356 if (r < 0)
3357 return r;
3358
3359 event_close_inode_data_fds(e);
3360
3361 if (event_next_pending(e) || e->need_process_child)
3362 goto pending;
3363
3364 e->state = SD_EVENT_ARMED;
3365
3366 return 0;
3367
3368 pending:
3369 e->state = SD_EVENT_ARMED;
3370 r = sd_event_wait(e, 0);
3371 if (r == 0)
3372 e->state = SD_EVENT_ARMED;
3373
3374 return r;
3375 }
3376
3377 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3378 struct epoll_event *ev_queue;
3379 unsigned ev_queue_max;
3380 int r, m, i;
3381
3382 assert_return(e, -EINVAL);
3383 assert_return(e = event_resolve(e), -ENOPKG);
3384 assert_return(!event_pid_changed(e), -ECHILD);
3385 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3386 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3387
3388 if (e->exit_requested) {
3389 e->state = SD_EVENT_PENDING;
3390 return 1;
3391 }
3392
3393 ev_queue_max = MAX(e->n_sources, 1u);
3394 ev_queue = newa(struct epoll_event, ev_queue_max);
3395
3396 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3397 if (e->inotify_data_buffered)
3398 timeout = 0;
3399
3400 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3401 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3402 if (m < 0) {
3403 if (errno == EINTR) {
3404 e->state = SD_EVENT_PENDING;
3405 return 1;
3406 }
3407
3408 r = -errno;
3409 goto finish;
3410 }
3411
3412 triple_timestamp_get(&e->timestamp);
3413
3414 for (i = 0; i < m; i++) {
3415
3416 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3417 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3418 else {
3419 WakeupType *t = ev_queue[i].data.ptr;
3420
3421 switch (*t) {
3422
3423 case WAKEUP_EVENT_SOURCE:
3424 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3425 break;
3426
3427 case WAKEUP_CLOCK_DATA: {
3428 struct clock_data *d = ev_queue[i].data.ptr;
3429 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3430 break;
3431 }
3432
3433 case WAKEUP_SIGNAL_DATA:
3434 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3435 break;
3436
3437 case WAKEUP_INOTIFY_DATA:
3438 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3439 break;
3440
3441 default:
3442 assert_not_reached("Invalid wake-up pointer");
3443 }
3444 }
3445 if (r < 0)
3446 goto finish;
3447 }
3448
3449 r = process_watchdog(e);
3450 if (r < 0)
3451 goto finish;
3452
3453 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3454 if (r < 0)
3455 goto finish;
3456
3457 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3458 if (r < 0)
3459 goto finish;
3460
3461 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3462 if (r < 0)
3463 goto finish;
3464
3465 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3466 if (r < 0)
3467 goto finish;
3468
3469 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3470 if (r < 0)
3471 goto finish;
3472
3473 if (e->need_process_child) {
3474 r = process_child(e);
3475 if (r < 0)
3476 goto finish;
3477 }
3478
3479 r = process_inotify(e);
3480 if (r < 0)
3481 goto finish;
3482
3483 if (event_next_pending(e)) {
3484 e->state = SD_EVENT_PENDING;
3485
3486 return 1;
3487 }
3488
3489 r = 0;
3490
3491 finish:
3492 e->state = SD_EVENT_INITIAL;
3493
3494 return r;
3495 }
3496
3497 _public_ int sd_event_dispatch(sd_event *e) {
3498 sd_event_source *p;
3499 int r;
3500
3501 assert_return(e, -EINVAL);
3502 assert_return(e = event_resolve(e), -ENOPKG);
3503 assert_return(!event_pid_changed(e), -ECHILD);
3504 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3505 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3506
3507 if (e->exit_requested)
3508 return dispatch_exit(e);
3509
3510 p = event_next_pending(e);
3511 if (p) {
3512 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3513
3514 ref = sd_event_ref(e);
3515 e->state = SD_EVENT_RUNNING;
3516 r = source_dispatch(p);
3517 e->state = SD_EVENT_INITIAL;
3518 return r;
3519 }
3520
3521 e->state = SD_EVENT_INITIAL;
3522
3523 return 1;
3524 }
3525
3526 static void event_log_delays(sd_event *e) {
3527 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3528 unsigned i;
3529 int o;
3530
3531 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3532 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3533 e->delays[i] = 0;
3534 }
3535 log_debug("Event loop iterations: %.*s", o, b);
3536 }
3537
3538 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3539 int r;
3540
3541 assert_return(e, -EINVAL);
3542 assert_return(e = event_resolve(e), -ENOPKG);
3543 assert_return(!event_pid_changed(e), -ECHILD);
3544 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3545 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3546
3547 if (e->profile_delays && e->last_run) {
3548 usec_t this_run;
3549 unsigned l;
3550
3551 this_run = now(CLOCK_MONOTONIC);
3552
3553 l = u64log2(this_run - e->last_run);
3554 assert(l < sizeof(e->delays));
3555 e->delays[l]++;
3556
3557 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3558 event_log_delays(e);
3559 e->last_log = this_run;
3560 }
3561 }
3562
3563 r = sd_event_prepare(e);
3564 if (r == 0)
3565 /* There was nothing? Then wait... */
3566 r = sd_event_wait(e, timeout);
3567
3568 if (e->profile_delays)
3569 e->last_run = now(CLOCK_MONOTONIC);
3570
3571 if (r > 0) {
3572 /* There's something now, then let's dispatch it */
3573 r = sd_event_dispatch(e);
3574 if (r < 0)
3575 return r;
3576
3577 return 1;
3578 }
3579
3580 return r;
3581 }
3582
3583 _public_ int sd_event_loop(sd_event *e) {
3584 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3585 int r;
3586
3587 assert_return(e, -EINVAL);
3588 assert_return(e = event_resolve(e), -ENOPKG);
3589 assert_return(!event_pid_changed(e), -ECHILD);
3590 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3591
3592 ref = sd_event_ref(e);
3593
3594 while (e->state != SD_EVENT_FINISHED) {
3595 r = sd_event_run(e, (uint64_t) -1);
3596 if (r < 0)
3597 return r;
3598 }
3599
3600 return e->exit_code;
3601 }
3602
3603 _public_ int sd_event_get_fd(sd_event *e) {
3604
3605 assert_return(e, -EINVAL);
3606 assert_return(e = event_resolve(e), -ENOPKG);
3607 assert_return(!event_pid_changed(e), -ECHILD);
3608
3609 return e->epoll_fd;
3610 }
3611
3612 _public_ int sd_event_get_state(sd_event *e) {
3613 assert_return(e, -EINVAL);
3614 assert_return(e = event_resolve(e), -ENOPKG);
3615 assert_return(!event_pid_changed(e), -ECHILD);
3616
3617 return e->state;
3618 }
3619
3620 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3621 assert_return(e, -EINVAL);
3622 assert_return(e = event_resolve(e), -ENOPKG);
3623 assert_return(code, -EINVAL);
3624 assert_return(!event_pid_changed(e), -ECHILD);
3625
3626 if (!e->exit_requested)
3627 return -ENODATA;
3628
3629 *code = e->exit_code;
3630 return 0;
3631 }
3632
3633 _public_ int sd_event_exit(sd_event *e, int code) {
3634 assert_return(e, -EINVAL);
3635 assert_return(e = event_resolve(e), -ENOPKG);
3636 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3637 assert_return(!event_pid_changed(e), -ECHILD);
3638
3639 e->exit_requested = true;
3640 e->exit_code = code;
3641
3642 return 0;
3643 }
3644
3645 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3646 assert_return(e, -EINVAL);
3647 assert_return(e = event_resolve(e), -ENOPKG);
3648 assert_return(usec, -EINVAL);
3649 assert_return(!event_pid_changed(e), -ECHILD);
3650
3651 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3652 return -EOPNOTSUPP;
3653
3654 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3655 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3656 * the purpose of getting the time this doesn't matter. */
3657 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3658 return -EOPNOTSUPP;
3659
3660 if (!triple_timestamp_is_set(&e->timestamp)) {
3661 /* Implicitly fall back to now() if we never ran
3662 * before and thus have no cached time. */
3663 *usec = now(clock);
3664 return 1;
3665 }
3666
3667 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3668 return 0;
3669 }
3670
3671 _public_ int sd_event_default(sd_event **ret) {
3672 sd_event *e = NULL;
3673 int r;
3674
3675 if (!ret)
3676 return !!default_event;
3677
3678 if (default_event) {
3679 *ret = sd_event_ref(default_event);
3680 return 0;
3681 }
3682
3683 r = sd_event_new(&e);
3684 if (r < 0)
3685 return r;
3686
3687 e->default_event_ptr = &default_event;
3688 e->tid = gettid();
3689 default_event = e;
3690
3691 *ret = e;
3692 return 1;
3693 }
3694
3695 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3696 assert_return(e, -EINVAL);
3697 assert_return(e = event_resolve(e), -ENOPKG);
3698 assert_return(tid, -EINVAL);
3699 assert_return(!event_pid_changed(e), -ECHILD);
3700
3701 if (e->tid != 0) {
3702 *tid = e->tid;
3703 return 0;
3704 }
3705
3706 return -ENXIO;
3707 }
3708
3709 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3710 int r;
3711
3712 assert_return(e, -EINVAL);
3713 assert_return(e = event_resolve(e), -ENOPKG);
3714 assert_return(!event_pid_changed(e), -ECHILD);
3715
3716 if (e->watchdog == !!b)
3717 return e->watchdog;
3718
3719 if (b) {
3720 struct epoll_event ev;
3721
3722 r = sd_watchdog_enabled(false, &e->watchdog_period);
3723 if (r <= 0)
3724 return r;
3725
3726 /* Issue first ping immediately */
3727 sd_notify(false, "WATCHDOG=1");
3728 e->watchdog_last = now(CLOCK_MONOTONIC);
3729
3730 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3731 if (e->watchdog_fd < 0)
3732 return -errno;
3733
3734 r = arm_watchdog(e);
3735 if (r < 0)
3736 goto fail;
3737
3738 ev = (struct epoll_event) {
3739 .events = EPOLLIN,
3740 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3741 };
3742
3743 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3744 if (r < 0) {
3745 r = -errno;
3746 goto fail;
3747 }
3748
3749 } else {
3750 if (e->watchdog_fd >= 0) {
3751 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3752 e->watchdog_fd = safe_close(e->watchdog_fd);
3753 }
3754 }
3755
3756 e->watchdog = !!b;
3757 return e->watchdog;
3758
3759 fail:
3760 e->watchdog_fd = safe_close(e->watchdog_fd);
3761 return r;
3762 }
3763
3764 _public_ int sd_event_get_watchdog(sd_event *e) {
3765 assert_return(e, -EINVAL);
3766 assert_return(e = event_resolve(e), -ENOPKG);
3767 assert_return(!event_pid_changed(e), -ECHILD);
3768
3769 return e->watchdog;
3770 }
3771
3772 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3773 assert_return(e, -EINVAL);
3774 assert_return(e = event_resolve(e), -ENOPKG);
3775 assert_return(!event_pid_changed(e), -ECHILD);
3776
3777 *ret = e->iteration;
3778 return 0;
3779 }
3780
3781 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3782 assert_return(s, -EINVAL);
3783
3784 s->destroy_callback = callback;
3785 return 0;
3786 }
3787
3788 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3789 assert_return(s, -EINVAL);
3790
3791 if (ret)
3792 *ret = s->destroy_callback;
3793
3794 return !!s->destroy_callback;
3795 }