]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
4af10401d83ec9a2c5428ba3ba16faec4d6fe58e
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 Copyright 2013 Lennart Poettering
4 ***/
5
6 #include <sys/epoll.h>
7 #include <sys/timerfd.h>
8 #include <sys/wait.h>
9
10 #include "sd-daemon.h"
11 #include "sd-event.h"
12 #include "sd-id128.h"
13
14 #include "alloc-util.h"
15 #include "fd-util.h"
16 #include "fs-util.h"
17 #include "hashmap.h"
18 #include "list.h"
19 #include "macro.h"
20 #include "missing.h"
21 #include "prioq.h"
22 #include "process-util.h"
23 #include "set.h"
24 #include "signal-util.h"
25 #include "string-table.h"
26 #include "string-util.h"
27 #include "time-util.h"
28 #include "util.h"
29
30 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
31
32 typedef enum EventSourceType {
33 SOURCE_IO,
34 SOURCE_TIME_REALTIME,
35 SOURCE_TIME_BOOTTIME,
36 SOURCE_TIME_MONOTONIC,
37 SOURCE_TIME_REALTIME_ALARM,
38 SOURCE_TIME_BOOTTIME_ALARM,
39 SOURCE_SIGNAL,
40 SOURCE_CHILD,
41 SOURCE_DEFER,
42 SOURCE_POST,
43 SOURCE_EXIT,
44 SOURCE_WATCHDOG,
45 SOURCE_INOTIFY,
46 _SOURCE_EVENT_SOURCE_TYPE_MAX,
47 _SOURCE_EVENT_SOURCE_TYPE_INVALID = -1
48 } EventSourceType;
49
50 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
51 [SOURCE_IO] = "io",
52 [SOURCE_TIME_REALTIME] = "realtime",
53 [SOURCE_TIME_BOOTTIME] = "bootime",
54 [SOURCE_TIME_MONOTONIC] = "monotonic",
55 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
56 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
57 [SOURCE_SIGNAL] = "signal",
58 [SOURCE_CHILD] = "child",
59 [SOURCE_DEFER] = "defer",
60 [SOURCE_POST] = "post",
61 [SOURCE_EXIT] = "exit",
62 [SOURCE_WATCHDOG] = "watchdog",
63 [SOURCE_INOTIFY] = "inotify",
64 };
65
66 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
67
68 /* All objects we use in epoll events start with this value, so that
69 * we know how to dispatch it */
70 typedef enum WakeupType {
71 WAKEUP_NONE,
72 WAKEUP_EVENT_SOURCE,
73 WAKEUP_CLOCK_DATA,
74 WAKEUP_SIGNAL_DATA,
75 WAKEUP_INOTIFY_DATA,
76 _WAKEUP_TYPE_MAX,
77 _WAKEUP_TYPE_INVALID = -1,
78 } WakeupType;
79
80 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
81
82 struct inode_data;
83
84 struct sd_event_source {
85 WakeupType wakeup;
86
87 unsigned n_ref;
88
89 sd_event *event;
90 void *userdata;
91 sd_event_handler_t prepare;
92
93 char *description;
94
95 EventSourceType type:5;
96 int enabled:3;
97 bool pending:1;
98 bool dispatching:1;
99 bool floating:1;
100
101 int64_t priority;
102 unsigned pending_index;
103 unsigned prepare_index;
104 uint64_t pending_iteration;
105 uint64_t prepare_iteration;
106
107 sd_event_destroy_t destroy_callback;
108
109 LIST_FIELDS(sd_event_source, sources);
110
111 union {
112 struct {
113 sd_event_io_handler_t callback;
114 int fd;
115 uint32_t events;
116 uint32_t revents;
117 bool registered:1;
118 bool owned:1;
119 } io;
120 struct {
121 sd_event_time_handler_t callback;
122 usec_t next, accuracy;
123 unsigned earliest_index;
124 unsigned latest_index;
125 } time;
126 struct {
127 sd_event_signal_handler_t callback;
128 struct signalfd_siginfo siginfo;
129 int sig;
130 } signal;
131 struct {
132 sd_event_child_handler_t callback;
133 siginfo_t siginfo;
134 pid_t pid;
135 int options;
136 } child;
137 struct {
138 sd_event_handler_t callback;
139 } defer;
140 struct {
141 sd_event_handler_t callback;
142 } post;
143 struct {
144 sd_event_handler_t callback;
145 unsigned prioq_index;
146 } exit;
147 struct {
148 sd_event_inotify_handler_t callback;
149 uint32_t mask;
150 struct inode_data *inode_data;
151 LIST_FIELDS(sd_event_source, by_inode_data);
152 } inotify;
153 };
154 };
155
156 struct clock_data {
157 WakeupType wakeup;
158 int fd;
159
160 /* For all clocks we maintain two priority queues each, one
161 * ordered for the earliest times the events may be
162 * dispatched, and one ordered by the latest times they must
163 * have been dispatched. The range between the top entries in
164 * the two prioqs is the time window we can freely schedule
165 * wakeups in */
166
167 Prioq *earliest;
168 Prioq *latest;
169 usec_t next;
170
171 bool needs_rearm:1;
172 };
173
174 struct signal_data {
175 WakeupType wakeup;
176
177 /* For each priority we maintain one signal fd, so that we
178 * only have to dequeue a single event per priority at a
179 * time. */
180
181 int fd;
182 int64_t priority;
183 sigset_t sigset;
184 sd_event_source *current;
185 };
186
187 /* A structure listing all event sources currently watching a specific inode */
188 struct inode_data {
189 /* The identifier for the inode, the combination of the .st_dev + .st_ino fields of the file */
190 ino_t ino;
191 dev_t dev;
192
193 /* An fd of the inode to watch. The fd is kept open until the next iteration of the loop, so that we can
194 * rearrange the priority still until then, as we need the original inode to change the priority as we need to
195 * add a watch descriptor to the right inotify for the priority which we can only do if we have a handle to the
196 * original inode. We keep a list of all inode_data objects with an open fd in the to_close list (see below) of
197 * the sd-event object, so that it is efficient to close everything, before entering the next event loop
198 * iteration. */
199 int fd;
200
201 /* The inotify "watch descriptor" */
202 int wd;
203
204 /* The combination of the mask of all inotify watches on this inode we manage. This is also the mask that has
205 * most recently been set on the watch descriptor. */
206 uint32_t combined_mask;
207
208 /* All event sources subscribed to this inode */
209 LIST_HEAD(sd_event_source, event_sources);
210
211 /* The inotify object we watch this inode with */
212 struct inotify_data *inotify_data;
213
214 /* A linked list of all inode data objects with fds to close (see above) */
215 LIST_FIELDS(struct inode_data, to_close);
216 };
217
218 /* A structure encapsulating an inotify fd */
219 struct inotify_data {
220 WakeupType wakeup;
221
222 /* For each priority we maintain one inotify fd, so that we only have to dequeue a single event per priority at
223 * a time */
224
225 int fd;
226 int64_t priority;
227
228 Hashmap *inodes; /* The inode_data structures keyed by dev+ino */
229 Hashmap *wd; /* The inode_data structures keyed by the watch descriptor for each */
230
231 /* The buffer we read inotify events into */
232 union inotify_event_buffer buffer;
233 size_t buffer_filled; /* fill level of the buffer */
234
235 /* How many event sources are currently marked pending for this inotify. We won't read new events off the
236 * inotify fd as long as there are still pending events on the inotify (because we have no strategy of queuing
237 * the events locally if they can't be coalesced). */
238 unsigned n_pending;
239
240 /* A linked list of all inotify objects with data already read, that still need processing. We keep this list
241 * to make it efficient to figure out what inotify objects to process data on next. */
242 LIST_FIELDS(struct inotify_data, buffered);
243 };
244
245 struct sd_event {
246 unsigned n_ref;
247
248 int epoll_fd;
249 int watchdog_fd;
250
251 Prioq *pending;
252 Prioq *prepare;
253
254 /* timerfd_create() only supports these five clocks so far. We
255 * can add support for more clocks when the kernel learns to
256 * deal with them, too. */
257 struct clock_data realtime;
258 struct clock_data boottime;
259 struct clock_data monotonic;
260 struct clock_data realtime_alarm;
261 struct clock_data boottime_alarm;
262
263 usec_t perturb;
264
265 sd_event_source **signal_sources; /* indexed by signal number */
266 Hashmap *signal_data; /* indexed by priority */
267
268 Hashmap *child_sources;
269 unsigned n_enabled_child_sources;
270
271 Set *post_sources;
272
273 Prioq *exit;
274
275 Hashmap *inotify_data; /* indexed by priority */
276
277 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
278 LIST_HEAD(struct inode_data, inode_data_to_close);
279
280 /* A list of inotify objects that already have events buffered which aren't processed yet */
281 LIST_HEAD(struct inotify_data, inotify_data_buffered);
282
283 pid_t original_pid;
284
285 uint64_t iteration;
286 triple_timestamp timestamp;
287 int state;
288
289 bool exit_requested:1;
290 bool need_process_child:1;
291 bool watchdog:1;
292 bool profile_delays:1;
293
294 int exit_code;
295
296 pid_t tid;
297 sd_event **default_event_ptr;
298
299 usec_t watchdog_last, watchdog_period;
300
301 unsigned n_sources;
302
303 LIST_HEAD(sd_event_source, sources);
304
305 usec_t last_run, last_log;
306 unsigned delays[sizeof(usec_t) * 8];
307 };
308
309 static thread_local sd_event *default_event = NULL;
310
311 static void source_disconnect(sd_event_source *s);
312 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
313
314 static sd_event *event_resolve(sd_event *e) {
315 return e == SD_EVENT_DEFAULT ? default_event : e;
316 }
317
318 static int pending_prioq_compare(const void *a, const void *b) {
319 const sd_event_source *x = a, *y = b;
320
321 assert(x->pending);
322 assert(y->pending);
323
324 /* Enabled ones first */
325 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
326 return -1;
327 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
328 return 1;
329
330 /* Lower priority values first */
331 if (x->priority < y->priority)
332 return -1;
333 if (x->priority > y->priority)
334 return 1;
335
336 /* Older entries first */
337 if (x->pending_iteration < y->pending_iteration)
338 return -1;
339 if (x->pending_iteration > y->pending_iteration)
340 return 1;
341
342 return 0;
343 }
344
345 static int prepare_prioq_compare(const void *a, const void *b) {
346 const sd_event_source *x = a, *y = b;
347
348 assert(x->prepare);
349 assert(y->prepare);
350
351 /* Enabled ones first */
352 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
353 return -1;
354 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
355 return 1;
356
357 /* Move most recently prepared ones last, so that we can stop
358 * preparing as soon as we hit one that has already been
359 * prepared in the current iteration */
360 if (x->prepare_iteration < y->prepare_iteration)
361 return -1;
362 if (x->prepare_iteration > y->prepare_iteration)
363 return 1;
364
365 /* Lower priority values first */
366 if (x->priority < y->priority)
367 return -1;
368 if (x->priority > y->priority)
369 return 1;
370
371 return 0;
372 }
373
374 static int earliest_time_prioq_compare(const void *a, const void *b) {
375 const sd_event_source *x = a, *y = b;
376
377 assert(EVENT_SOURCE_IS_TIME(x->type));
378 assert(x->type == y->type);
379
380 /* Enabled ones first */
381 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
382 return -1;
383 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
384 return 1;
385
386 /* Move the pending ones to the end */
387 if (!x->pending && y->pending)
388 return -1;
389 if (x->pending && !y->pending)
390 return 1;
391
392 /* Order by time */
393 if (x->time.next < y->time.next)
394 return -1;
395 if (x->time.next > y->time.next)
396 return 1;
397
398 return 0;
399 }
400
401 static usec_t time_event_source_latest(const sd_event_source *s) {
402 return usec_add(s->time.next, s->time.accuracy);
403 }
404
405 static int latest_time_prioq_compare(const void *a, const void *b) {
406 const sd_event_source *x = a, *y = b;
407
408 assert(EVENT_SOURCE_IS_TIME(x->type));
409 assert(x->type == y->type);
410
411 /* Enabled ones first */
412 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
413 return -1;
414 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
415 return 1;
416
417 /* Move the pending ones to the end */
418 if (!x->pending && y->pending)
419 return -1;
420 if (x->pending && !y->pending)
421 return 1;
422
423 /* Order by time */
424 if (time_event_source_latest(x) < time_event_source_latest(y))
425 return -1;
426 if (time_event_source_latest(x) > time_event_source_latest(y))
427 return 1;
428
429 return 0;
430 }
431
432 static int exit_prioq_compare(const void *a, const void *b) {
433 const sd_event_source *x = a, *y = b;
434
435 assert(x->type == SOURCE_EXIT);
436 assert(y->type == SOURCE_EXIT);
437
438 /* Enabled ones first */
439 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
440 return -1;
441 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
442 return 1;
443
444 /* Lower priority values first */
445 if (x->priority < y->priority)
446 return -1;
447 if (x->priority > y->priority)
448 return 1;
449
450 return 0;
451 }
452
453 static void free_clock_data(struct clock_data *d) {
454 assert(d);
455 assert(d->wakeup == WAKEUP_CLOCK_DATA);
456
457 safe_close(d->fd);
458 prioq_free(d->earliest);
459 prioq_free(d->latest);
460 }
461
462 static void event_free(sd_event *e) {
463 sd_event_source *s;
464
465 assert(e);
466
467 while ((s = e->sources)) {
468 assert(s->floating);
469 source_disconnect(s);
470 sd_event_source_unref(s);
471 }
472
473 assert(e->n_sources == 0);
474
475 if (e->default_event_ptr)
476 *(e->default_event_ptr) = NULL;
477
478 safe_close(e->epoll_fd);
479 safe_close(e->watchdog_fd);
480
481 free_clock_data(&e->realtime);
482 free_clock_data(&e->boottime);
483 free_clock_data(&e->monotonic);
484 free_clock_data(&e->realtime_alarm);
485 free_clock_data(&e->boottime_alarm);
486
487 prioq_free(e->pending);
488 prioq_free(e->prepare);
489 prioq_free(e->exit);
490
491 free(e->signal_sources);
492 hashmap_free(e->signal_data);
493
494 hashmap_free(e->inotify_data);
495
496 hashmap_free(e->child_sources);
497 set_free(e->post_sources);
498 free(e);
499 }
500
501 _public_ int sd_event_new(sd_event** ret) {
502 sd_event *e;
503 int r;
504
505 assert_return(ret, -EINVAL);
506
507 e = new(sd_event, 1);
508 if (!e)
509 return -ENOMEM;
510
511 *e = (sd_event) {
512 .n_ref = 1,
513 .epoll_fd = -1,
514 .watchdog_fd = -1,
515 .realtime.wakeup = WAKEUP_CLOCK_DATA,
516 .realtime.fd = -1,
517 .realtime.next = USEC_INFINITY,
518 .boottime.wakeup = WAKEUP_CLOCK_DATA,
519 .boottime.fd = -1,
520 .boottime.next = USEC_INFINITY,
521 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
522 .monotonic.fd = -1,
523 .monotonic.next = USEC_INFINITY,
524 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
525 .realtime_alarm.fd = -1,
526 .realtime_alarm.next = USEC_INFINITY,
527 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
528 .boottime_alarm.fd = -1,
529 .boottime_alarm.next = USEC_INFINITY,
530 .perturb = USEC_INFINITY,
531 .original_pid = getpid_cached(),
532 };
533
534 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
535 if (r < 0)
536 goto fail;
537
538 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
539 if (e->epoll_fd < 0) {
540 r = -errno;
541 goto fail;
542 }
543
544 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
545
546 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
547 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
548 e->profile_delays = true;
549 }
550
551 *ret = e;
552 return 0;
553
554 fail:
555 event_free(e);
556 return r;
557 }
558
559 _public_ sd_event* sd_event_ref(sd_event *e) {
560
561 if (!e)
562 return NULL;
563
564 assert(e->n_ref >= 1);
565 e->n_ref++;
566
567 return e;
568 }
569
570 _public_ sd_event* sd_event_unref(sd_event *e) {
571
572 if (!e)
573 return NULL;
574
575 assert(e->n_ref >= 1);
576 e->n_ref--;
577
578 if (e->n_ref <= 0)
579 event_free(e);
580
581 return NULL;
582 }
583
584 static bool event_pid_changed(sd_event *e) {
585 assert(e);
586
587 /* We don't support people creating an event loop and keeping
588 * it around over a fork(). Let's complain. */
589
590 return e->original_pid != getpid_cached();
591 }
592
593 static void source_io_unregister(sd_event_source *s) {
594 int r;
595
596 assert(s);
597 assert(s->type == SOURCE_IO);
598
599 if (event_pid_changed(s->event))
600 return;
601
602 if (!s->io.registered)
603 return;
604
605 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
606 if (r < 0)
607 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
608 strna(s->description), event_source_type_to_string(s->type));
609
610 s->io.registered = false;
611 }
612
613 static int source_io_register(
614 sd_event_source *s,
615 int enabled,
616 uint32_t events) {
617
618 struct epoll_event ev;
619 int r;
620
621 assert(s);
622 assert(s->type == SOURCE_IO);
623 assert(enabled != SD_EVENT_OFF);
624
625 ev = (struct epoll_event) {
626 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
627 .data.ptr = s,
628 };
629
630 if (s->io.registered)
631 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
632 else
633 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
634 if (r < 0)
635 return -errno;
636
637 s->io.registered = true;
638
639 return 0;
640 }
641
642 static clockid_t event_source_type_to_clock(EventSourceType t) {
643
644 switch (t) {
645
646 case SOURCE_TIME_REALTIME:
647 return CLOCK_REALTIME;
648
649 case SOURCE_TIME_BOOTTIME:
650 return CLOCK_BOOTTIME;
651
652 case SOURCE_TIME_MONOTONIC:
653 return CLOCK_MONOTONIC;
654
655 case SOURCE_TIME_REALTIME_ALARM:
656 return CLOCK_REALTIME_ALARM;
657
658 case SOURCE_TIME_BOOTTIME_ALARM:
659 return CLOCK_BOOTTIME_ALARM;
660
661 default:
662 return (clockid_t) -1;
663 }
664 }
665
666 static EventSourceType clock_to_event_source_type(clockid_t clock) {
667
668 switch (clock) {
669
670 case CLOCK_REALTIME:
671 return SOURCE_TIME_REALTIME;
672
673 case CLOCK_BOOTTIME:
674 return SOURCE_TIME_BOOTTIME;
675
676 case CLOCK_MONOTONIC:
677 return SOURCE_TIME_MONOTONIC;
678
679 case CLOCK_REALTIME_ALARM:
680 return SOURCE_TIME_REALTIME_ALARM;
681
682 case CLOCK_BOOTTIME_ALARM:
683 return SOURCE_TIME_BOOTTIME_ALARM;
684
685 default:
686 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
687 }
688 }
689
690 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
691 assert(e);
692
693 switch (t) {
694
695 case SOURCE_TIME_REALTIME:
696 return &e->realtime;
697
698 case SOURCE_TIME_BOOTTIME:
699 return &e->boottime;
700
701 case SOURCE_TIME_MONOTONIC:
702 return &e->monotonic;
703
704 case SOURCE_TIME_REALTIME_ALARM:
705 return &e->realtime_alarm;
706
707 case SOURCE_TIME_BOOTTIME_ALARM:
708 return &e->boottime_alarm;
709
710 default:
711 return NULL;
712 }
713 }
714
715 static int event_make_signal_data(
716 sd_event *e,
717 int sig,
718 struct signal_data **ret) {
719
720 struct epoll_event ev;
721 struct signal_data *d;
722 bool added = false;
723 sigset_t ss_copy;
724 int64_t priority;
725 int r;
726
727 assert(e);
728
729 if (event_pid_changed(e))
730 return -ECHILD;
731
732 if (e->signal_sources && e->signal_sources[sig])
733 priority = e->signal_sources[sig]->priority;
734 else
735 priority = SD_EVENT_PRIORITY_NORMAL;
736
737 d = hashmap_get(e->signal_data, &priority);
738 if (d) {
739 if (sigismember(&d->sigset, sig) > 0) {
740 if (ret)
741 *ret = d;
742 return 0;
743 }
744 } else {
745 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
746 if (r < 0)
747 return r;
748
749 d = new(struct signal_data, 1);
750 if (!d)
751 return -ENOMEM;
752
753 *d = (struct signal_data) {
754 .wakeup = WAKEUP_SIGNAL_DATA,
755 .fd = -1,
756 .priority = priority,
757 };
758
759 r = hashmap_put(e->signal_data, &d->priority, d);
760 if (r < 0) {
761 free(d);
762 return r;
763 }
764
765 added = true;
766 }
767
768 ss_copy = d->sigset;
769 assert_se(sigaddset(&ss_copy, sig) >= 0);
770
771 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
772 if (r < 0) {
773 r = -errno;
774 goto fail;
775 }
776
777 d->sigset = ss_copy;
778
779 if (d->fd >= 0) {
780 if (ret)
781 *ret = d;
782 return 0;
783 }
784
785 d->fd = fd_move_above_stdio(r);
786
787 ev = (struct epoll_event) {
788 .events = EPOLLIN,
789 .data.ptr = d,
790 };
791
792 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
793 if (r < 0) {
794 r = -errno;
795 goto fail;
796 }
797
798 if (ret)
799 *ret = d;
800
801 return 0;
802
803 fail:
804 if (added) {
805 d->fd = safe_close(d->fd);
806 hashmap_remove(e->signal_data, &d->priority);
807 free(d);
808 }
809
810 return r;
811 }
812
813 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
814 assert(e);
815 assert(d);
816
817 /* Turns off the specified signal in the signal data
818 * object. If the signal mask of the object becomes empty that
819 * way removes it. */
820
821 if (sigismember(&d->sigset, sig) == 0)
822 return;
823
824 assert_se(sigdelset(&d->sigset, sig) >= 0);
825
826 if (sigisemptyset(&d->sigset)) {
827
828 /* If all the mask is all-zero we can get rid of the structure */
829 hashmap_remove(e->signal_data, &d->priority);
830 safe_close(d->fd);
831 free(d);
832 return;
833 }
834
835 assert(d->fd >= 0);
836
837 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
838 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
839 }
840
841 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
842 struct signal_data *d;
843 static const int64_t zero_priority = 0;
844
845 assert(e);
846
847 /* Rechecks if the specified signal is still something we are
848 * interested in. If not, we'll unmask it, and possibly drop
849 * the signalfd for it. */
850
851 if (sig == SIGCHLD &&
852 e->n_enabled_child_sources > 0)
853 return;
854
855 if (e->signal_sources &&
856 e->signal_sources[sig] &&
857 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
858 return;
859
860 /*
861 * The specified signal might be enabled in three different queues:
862 *
863 * 1) the one that belongs to the priority passed (if it is non-NULL)
864 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
865 * 3) the 0 priority (to cover the SIGCHLD case)
866 *
867 * Hence, let's remove it from all three here.
868 */
869
870 if (priority) {
871 d = hashmap_get(e->signal_data, priority);
872 if (d)
873 event_unmask_signal_data(e, d, sig);
874 }
875
876 if (e->signal_sources && e->signal_sources[sig]) {
877 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
878 if (d)
879 event_unmask_signal_data(e, d, sig);
880 }
881
882 d = hashmap_get(e->signal_data, &zero_priority);
883 if (d)
884 event_unmask_signal_data(e, d, sig);
885 }
886
887 static void source_disconnect(sd_event_source *s) {
888 sd_event *event;
889
890 assert(s);
891
892 if (!s->event)
893 return;
894
895 assert(s->event->n_sources > 0);
896
897 switch (s->type) {
898
899 case SOURCE_IO:
900 if (s->io.fd >= 0)
901 source_io_unregister(s);
902
903 break;
904
905 case SOURCE_TIME_REALTIME:
906 case SOURCE_TIME_BOOTTIME:
907 case SOURCE_TIME_MONOTONIC:
908 case SOURCE_TIME_REALTIME_ALARM:
909 case SOURCE_TIME_BOOTTIME_ALARM: {
910 struct clock_data *d;
911
912 d = event_get_clock_data(s->event, s->type);
913 assert(d);
914
915 prioq_remove(d->earliest, s, &s->time.earliest_index);
916 prioq_remove(d->latest, s, &s->time.latest_index);
917 d->needs_rearm = true;
918 break;
919 }
920
921 case SOURCE_SIGNAL:
922 if (s->signal.sig > 0) {
923
924 if (s->event->signal_sources)
925 s->event->signal_sources[s->signal.sig] = NULL;
926
927 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
928 }
929
930 break;
931
932 case SOURCE_CHILD:
933 if (s->child.pid > 0) {
934 if (s->enabled != SD_EVENT_OFF) {
935 assert(s->event->n_enabled_child_sources > 0);
936 s->event->n_enabled_child_sources--;
937 }
938
939 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
940 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
941 }
942
943 break;
944
945 case SOURCE_DEFER:
946 /* nothing */
947 break;
948
949 case SOURCE_POST:
950 set_remove(s->event->post_sources, s);
951 break;
952
953 case SOURCE_EXIT:
954 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
955 break;
956
957 case SOURCE_INOTIFY: {
958 struct inode_data *inode_data;
959
960 inode_data = s->inotify.inode_data;
961 if (inode_data) {
962 struct inotify_data *inotify_data;
963 assert_se(inotify_data = inode_data->inotify_data);
964
965 /* Detach this event source from the inode object */
966 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
967 s->inotify.inode_data = NULL;
968
969 if (s->pending) {
970 assert(inotify_data->n_pending > 0);
971 inotify_data->n_pending--;
972 }
973
974 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
975 * continued to being watched. That's because inotify doesn't really have an API for that: we
976 * can only change watch masks with access to the original inode either by fd or by path. But
977 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
978 * continously and keeping the mount busy which we can't really do. We could reconstruct the
979 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
980 * there), but given the need for open_by_handle_at() which is privileged and not universally
981 * available this would be quite an incomplete solution. Hence we go the other way, leave the
982 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
983 * anymore after reception. Yes, this sucks, but … Linux … */
984
985 /* Maybe release the inode data (and its inotify) */
986 event_gc_inode_data(s->event, inode_data);
987 }
988
989 break;
990 }
991
992 default:
993 assert_not_reached("Wut? I shouldn't exist.");
994 }
995
996 if (s->pending)
997 prioq_remove(s->event->pending, s, &s->pending_index);
998
999 if (s->prepare)
1000 prioq_remove(s->event->prepare, s, &s->prepare_index);
1001
1002 event = s->event;
1003
1004 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
1005 s->event = NULL;
1006 LIST_REMOVE(sources, event->sources, s);
1007 event->n_sources--;
1008
1009 if (!s->floating)
1010 sd_event_unref(event);
1011 }
1012
1013 static void source_free(sd_event_source *s) {
1014 assert(s);
1015
1016 source_disconnect(s);
1017
1018 if (s->type == SOURCE_IO && s->io.owned)
1019 s->io.fd = safe_close(s->io.fd);
1020
1021 if (s->destroy_callback)
1022 s->destroy_callback(s->userdata);
1023
1024 free(s->description);
1025 free(s);
1026 }
1027
1028 static int source_set_pending(sd_event_source *s, bool b) {
1029 int r;
1030
1031 assert(s);
1032 assert(s->type != SOURCE_EXIT);
1033
1034 if (s->pending == b)
1035 return 0;
1036
1037 s->pending = b;
1038
1039 if (b) {
1040 s->pending_iteration = s->event->iteration;
1041
1042 r = prioq_put(s->event->pending, s, &s->pending_index);
1043 if (r < 0) {
1044 s->pending = false;
1045 return r;
1046 }
1047 } else
1048 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
1049
1050 if (EVENT_SOURCE_IS_TIME(s->type)) {
1051 struct clock_data *d;
1052
1053 d = event_get_clock_data(s->event, s->type);
1054 assert(d);
1055
1056 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1057 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1058 d->needs_rearm = true;
1059 }
1060
1061 if (s->type == SOURCE_SIGNAL && !b) {
1062 struct signal_data *d;
1063
1064 d = hashmap_get(s->event->signal_data, &s->priority);
1065 if (d && d->current == s)
1066 d->current = NULL;
1067 }
1068
1069 if (s->type == SOURCE_INOTIFY) {
1070
1071 assert(s->inotify.inode_data);
1072 assert(s->inotify.inode_data->inotify_data);
1073
1074 if (b)
1075 s->inotify.inode_data->inotify_data->n_pending ++;
1076 else {
1077 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
1078 s->inotify.inode_data->inotify_data->n_pending --;
1079 }
1080 }
1081
1082 return 0;
1083 }
1084
1085 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
1086 sd_event_source *s;
1087
1088 assert(e);
1089
1090 s = new(sd_event_source, 1);
1091 if (!s)
1092 return NULL;
1093
1094 *s = (struct sd_event_source) {
1095 .n_ref = 1,
1096 .event = e,
1097 .floating = floating,
1098 .type = type,
1099 .pending_index = PRIOQ_IDX_NULL,
1100 .prepare_index = PRIOQ_IDX_NULL,
1101 };
1102
1103 if (!floating)
1104 sd_event_ref(e);
1105
1106 LIST_PREPEND(sources, e->sources, s);
1107 e->n_sources++;
1108
1109 return s;
1110 }
1111
1112 _public_ int sd_event_add_io(
1113 sd_event *e,
1114 sd_event_source **ret,
1115 int fd,
1116 uint32_t events,
1117 sd_event_io_handler_t callback,
1118 void *userdata) {
1119
1120 sd_event_source *s;
1121 int r;
1122
1123 assert_return(e, -EINVAL);
1124 assert_return(e = event_resolve(e), -ENOPKG);
1125 assert_return(fd >= 0, -EBADF);
1126 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1127 assert_return(callback, -EINVAL);
1128 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1129 assert_return(!event_pid_changed(e), -ECHILD);
1130
1131 s = source_new(e, !ret, SOURCE_IO);
1132 if (!s)
1133 return -ENOMEM;
1134
1135 s->wakeup = WAKEUP_EVENT_SOURCE;
1136 s->io.fd = fd;
1137 s->io.events = events;
1138 s->io.callback = callback;
1139 s->userdata = userdata;
1140 s->enabled = SD_EVENT_ON;
1141
1142 r = source_io_register(s, s->enabled, events);
1143 if (r < 0) {
1144 source_free(s);
1145 return r;
1146 }
1147
1148 if (ret)
1149 *ret = s;
1150
1151 return 0;
1152 }
1153
1154 static void initialize_perturb(sd_event *e) {
1155 sd_id128_t bootid = {};
1156
1157 /* When we sleep for longer, we try to realign the wakeup to
1158 the same time wihtin each minute/second/250ms, so that
1159 events all across the system can be coalesced into a single
1160 CPU wakeup. However, let's take some system-specific
1161 randomness for this value, so that in a network of systems
1162 with synced clocks timer events are distributed a
1163 bit. Here, we calculate a perturbation usec offset from the
1164 boot ID. */
1165
1166 if (_likely_(e->perturb != USEC_INFINITY))
1167 return;
1168
1169 if (sd_id128_get_boot(&bootid) >= 0)
1170 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
1171 }
1172
1173 static int event_setup_timer_fd(
1174 sd_event *e,
1175 struct clock_data *d,
1176 clockid_t clock) {
1177
1178 struct epoll_event ev;
1179 int r, fd;
1180
1181 assert(e);
1182 assert(d);
1183
1184 if (_likely_(d->fd >= 0))
1185 return 0;
1186
1187 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
1188 if (fd < 0)
1189 return -errno;
1190
1191 fd = fd_move_above_stdio(fd);
1192
1193 ev = (struct epoll_event) {
1194 .events = EPOLLIN,
1195 .data.ptr = d,
1196 };
1197
1198 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
1199 if (r < 0) {
1200 safe_close(fd);
1201 return -errno;
1202 }
1203
1204 d->fd = fd;
1205 return 0;
1206 }
1207
1208 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
1209 assert(s);
1210
1211 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1212 }
1213
1214 _public_ int sd_event_add_time(
1215 sd_event *e,
1216 sd_event_source **ret,
1217 clockid_t clock,
1218 uint64_t usec,
1219 uint64_t accuracy,
1220 sd_event_time_handler_t callback,
1221 void *userdata) {
1222
1223 EventSourceType type;
1224 sd_event_source *s;
1225 struct clock_data *d;
1226 int r;
1227
1228 assert_return(e, -EINVAL);
1229 assert_return(e = event_resolve(e), -ENOPKG);
1230 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1231 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1232 assert_return(!event_pid_changed(e), -ECHILD);
1233
1234 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1235 return -EOPNOTSUPP;
1236
1237 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1238 if (type < 0)
1239 return -EOPNOTSUPP;
1240
1241 if (!callback)
1242 callback = time_exit_callback;
1243
1244 d = event_get_clock_data(e, type);
1245 assert(d);
1246
1247 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1248 if (r < 0)
1249 return r;
1250
1251 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1252 if (r < 0)
1253 return r;
1254
1255 if (d->fd < 0) {
1256 r = event_setup_timer_fd(e, d, clock);
1257 if (r < 0)
1258 return r;
1259 }
1260
1261 s = source_new(e, !ret, type);
1262 if (!s)
1263 return -ENOMEM;
1264
1265 s->time.next = usec;
1266 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1267 s->time.callback = callback;
1268 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1269 s->userdata = userdata;
1270 s->enabled = SD_EVENT_ONESHOT;
1271
1272 d->needs_rearm = true;
1273
1274 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1275 if (r < 0)
1276 goto fail;
1277
1278 r = prioq_put(d->latest, s, &s->time.latest_index);
1279 if (r < 0)
1280 goto fail;
1281
1282 if (ret)
1283 *ret = s;
1284
1285 return 0;
1286
1287 fail:
1288 source_free(s);
1289 return r;
1290 }
1291
1292 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1293 assert(s);
1294
1295 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1296 }
1297
1298 _public_ int sd_event_add_signal(
1299 sd_event *e,
1300 sd_event_source **ret,
1301 int sig,
1302 sd_event_signal_handler_t callback,
1303 void *userdata) {
1304
1305 sd_event_source *s;
1306 struct signal_data *d;
1307 sigset_t ss;
1308 int r;
1309
1310 assert_return(e, -EINVAL);
1311 assert_return(e = event_resolve(e), -ENOPKG);
1312 assert_return(SIGNAL_VALID(sig), -EINVAL);
1313 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1314 assert_return(!event_pid_changed(e), -ECHILD);
1315
1316 if (!callback)
1317 callback = signal_exit_callback;
1318
1319 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1320 if (r != 0)
1321 return -r;
1322
1323 if (!sigismember(&ss, sig))
1324 return -EBUSY;
1325
1326 if (!e->signal_sources) {
1327 e->signal_sources = new0(sd_event_source*, _NSIG);
1328 if (!e->signal_sources)
1329 return -ENOMEM;
1330 } else if (e->signal_sources[sig])
1331 return -EBUSY;
1332
1333 s = source_new(e, !ret, SOURCE_SIGNAL);
1334 if (!s)
1335 return -ENOMEM;
1336
1337 s->signal.sig = sig;
1338 s->signal.callback = callback;
1339 s->userdata = userdata;
1340 s->enabled = SD_EVENT_ON;
1341
1342 e->signal_sources[sig] = s;
1343
1344 r = event_make_signal_data(e, sig, &d);
1345 if (r < 0) {
1346 source_free(s);
1347 return r;
1348 }
1349
1350 /* Use the signal name as description for the event source by default */
1351 (void) sd_event_source_set_description(s, signal_to_string(sig));
1352
1353 if (ret)
1354 *ret = s;
1355
1356 return 0;
1357 }
1358
1359 _public_ int sd_event_add_child(
1360 sd_event *e,
1361 sd_event_source **ret,
1362 pid_t pid,
1363 int options,
1364 sd_event_child_handler_t callback,
1365 void *userdata) {
1366
1367 sd_event_source *s;
1368 int r;
1369
1370 assert_return(e, -EINVAL);
1371 assert_return(e = event_resolve(e), -ENOPKG);
1372 assert_return(pid > 1, -EINVAL);
1373 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1374 assert_return(options != 0, -EINVAL);
1375 assert_return(callback, -EINVAL);
1376 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1377 assert_return(!event_pid_changed(e), -ECHILD);
1378
1379 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1380 if (r < 0)
1381 return r;
1382
1383 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1384 return -EBUSY;
1385
1386 s = source_new(e, !ret, SOURCE_CHILD);
1387 if (!s)
1388 return -ENOMEM;
1389
1390 s->child.pid = pid;
1391 s->child.options = options;
1392 s->child.callback = callback;
1393 s->userdata = userdata;
1394 s->enabled = SD_EVENT_ONESHOT;
1395
1396 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1397 if (r < 0) {
1398 source_free(s);
1399 return r;
1400 }
1401
1402 e->n_enabled_child_sources++;
1403
1404 r = event_make_signal_data(e, SIGCHLD, NULL);
1405 if (r < 0) {
1406 e->n_enabled_child_sources--;
1407 source_free(s);
1408 return r;
1409 }
1410
1411 e->need_process_child = true;
1412
1413 if (ret)
1414 *ret = s;
1415
1416 return 0;
1417 }
1418
1419 _public_ int sd_event_add_defer(
1420 sd_event *e,
1421 sd_event_source **ret,
1422 sd_event_handler_t callback,
1423 void *userdata) {
1424
1425 sd_event_source *s;
1426 int r;
1427
1428 assert_return(e, -EINVAL);
1429 assert_return(e = event_resolve(e), -ENOPKG);
1430 assert_return(callback, -EINVAL);
1431 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1432 assert_return(!event_pid_changed(e), -ECHILD);
1433
1434 s = source_new(e, !ret, SOURCE_DEFER);
1435 if (!s)
1436 return -ENOMEM;
1437
1438 s->defer.callback = callback;
1439 s->userdata = userdata;
1440 s->enabled = SD_EVENT_ONESHOT;
1441
1442 r = source_set_pending(s, true);
1443 if (r < 0) {
1444 source_free(s);
1445 return r;
1446 }
1447
1448 if (ret)
1449 *ret = s;
1450
1451 return 0;
1452 }
1453
1454 _public_ int sd_event_add_post(
1455 sd_event *e,
1456 sd_event_source **ret,
1457 sd_event_handler_t callback,
1458 void *userdata) {
1459
1460 sd_event_source *s;
1461 int r;
1462
1463 assert_return(e, -EINVAL);
1464 assert_return(e = event_resolve(e), -ENOPKG);
1465 assert_return(callback, -EINVAL);
1466 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1467 assert_return(!event_pid_changed(e), -ECHILD);
1468
1469 r = set_ensure_allocated(&e->post_sources, NULL);
1470 if (r < 0)
1471 return r;
1472
1473 s = source_new(e, !ret, SOURCE_POST);
1474 if (!s)
1475 return -ENOMEM;
1476
1477 s->post.callback = callback;
1478 s->userdata = userdata;
1479 s->enabled = SD_EVENT_ON;
1480
1481 r = set_put(e->post_sources, s);
1482 if (r < 0) {
1483 source_free(s);
1484 return r;
1485 }
1486
1487 if (ret)
1488 *ret = s;
1489
1490 return 0;
1491 }
1492
1493 _public_ int sd_event_add_exit(
1494 sd_event *e,
1495 sd_event_source **ret,
1496 sd_event_handler_t callback,
1497 void *userdata) {
1498
1499 sd_event_source *s;
1500 int r;
1501
1502 assert_return(e, -EINVAL);
1503 assert_return(e = event_resolve(e), -ENOPKG);
1504 assert_return(callback, -EINVAL);
1505 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1506 assert_return(!event_pid_changed(e), -ECHILD);
1507
1508 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1509 if (r < 0)
1510 return r;
1511
1512 s = source_new(e, !ret, SOURCE_EXIT);
1513 if (!s)
1514 return -ENOMEM;
1515
1516 s->exit.callback = callback;
1517 s->userdata = userdata;
1518 s->exit.prioq_index = PRIOQ_IDX_NULL;
1519 s->enabled = SD_EVENT_ONESHOT;
1520
1521 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1522 if (r < 0) {
1523 source_free(s);
1524 return r;
1525 }
1526
1527 if (ret)
1528 *ret = s;
1529
1530 return 0;
1531 }
1532
1533 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1534 assert(e);
1535
1536 if (!d)
1537 return;
1538
1539 assert(hashmap_isempty(d->inodes));
1540 assert(hashmap_isempty(d->wd));
1541
1542 if (d->buffer_filled > 0)
1543 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1544
1545 hashmap_free(d->inodes);
1546 hashmap_free(d->wd);
1547
1548 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1549
1550 if (d->fd >= 0) {
1551 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1552 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1553
1554 safe_close(d->fd);
1555 }
1556 free(d);
1557 }
1558
1559 static int event_make_inotify_data(
1560 sd_event *e,
1561 int64_t priority,
1562 struct inotify_data **ret) {
1563
1564 _cleanup_close_ int fd = -1;
1565 struct inotify_data *d;
1566 struct epoll_event ev;
1567 int r;
1568
1569 assert(e);
1570
1571 d = hashmap_get(e->inotify_data, &priority);
1572 if (d) {
1573 if (ret)
1574 *ret = d;
1575 return 0;
1576 }
1577
1578 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1579 if (fd < 0)
1580 return -errno;
1581
1582 fd = fd_move_above_stdio(fd);
1583
1584 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1585 if (r < 0)
1586 return r;
1587
1588 d = new(struct inotify_data, 1);
1589 if (!d)
1590 return -ENOMEM;
1591
1592 *d = (struct inotify_data) {
1593 .wakeup = WAKEUP_INOTIFY_DATA,
1594 .fd = TAKE_FD(fd),
1595 .priority = priority,
1596 };
1597
1598 r = hashmap_put(e->inotify_data, &d->priority, d);
1599 if (r < 0) {
1600 d->fd = safe_close(d->fd);
1601 free(d);
1602 return r;
1603 }
1604
1605 ev = (struct epoll_event) {
1606 .events = EPOLLIN,
1607 .data.ptr = d,
1608 };
1609
1610 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1611 r = -errno;
1612 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1613 * remove the fd from the epoll first, which we don't want as we couldn't
1614 * add it in the first place. */
1615 event_free_inotify_data(e, d);
1616 return r;
1617 }
1618
1619 if (ret)
1620 *ret = d;
1621
1622 return 1;
1623 }
1624
1625 static int inode_data_compare(const void *a, const void *b) {
1626 const struct inode_data *x = a, *y = b;
1627
1628 assert(x);
1629 assert(y);
1630
1631 if (x->dev < y->dev)
1632 return -1;
1633 if (x->dev > y->dev)
1634 return 1;
1635
1636 if (x->ino < y->ino)
1637 return -1;
1638 if (x->ino > y->ino)
1639 return 1;
1640
1641 return 0;
1642 }
1643
1644 static void inode_data_hash_func(const void *p, struct siphash *state) {
1645 const struct inode_data *d = p;
1646
1647 assert(p);
1648
1649 siphash24_compress(&d->dev, sizeof(d->dev), state);
1650 siphash24_compress(&d->ino, sizeof(d->ino), state);
1651 }
1652
1653 const struct hash_ops inode_data_hash_ops = {
1654 .hash = inode_data_hash_func,
1655 .compare = inode_data_compare
1656 };
1657
1658 static void event_free_inode_data(
1659 sd_event *e,
1660 struct inode_data *d) {
1661
1662 assert(e);
1663
1664 if (!d)
1665 return;
1666
1667 assert(!d->event_sources);
1668
1669 if (d->fd >= 0) {
1670 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1671 safe_close(d->fd);
1672 }
1673
1674 if (d->inotify_data) {
1675
1676 if (d->wd >= 0) {
1677 if (d->inotify_data->fd >= 0) {
1678 /* So here's a problem. At the time this runs the watch descriptor might already be
1679 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1680 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1681 * likely case to happen. */
1682
1683 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1684 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1685 }
1686
1687 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1688 }
1689
1690 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1691 }
1692
1693 free(d);
1694 }
1695
1696 static void event_gc_inode_data(
1697 sd_event *e,
1698 struct inode_data *d) {
1699
1700 struct inotify_data *inotify_data;
1701
1702 assert(e);
1703
1704 if (!d)
1705 return;
1706
1707 if (d->event_sources)
1708 return;
1709
1710 inotify_data = d->inotify_data;
1711 event_free_inode_data(e, d);
1712
1713 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1714 event_free_inotify_data(e, inotify_data);
1715 }
1716
1717 static int event_make_inode_data(
1718 sd_event *e,
1719 struct inotify_data *inotify_data,
1720 dev_t dev,
1721 ino_t ino,
1722 struct inode_data **ret) {
1723
1724 struct inode_data *d, key;
1725 int r;
1726
1727 assert(e);
1728 assert(inotify_data);
1729
1730 key = (struct inode_data) {
1731 .ino = ino,
1732 .dev = dev,
1733 };
1734
1735 d = hashmap_get(inotify_data->inodes, &key);
1736 if (d) {
1737 if (ret)
1738 *ret = d;
1739
1740 return 0;
1741 }
1742
1743 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1744 if (r < 0)
1745 return r;
1746
1747 d = new(struct inode_data, 1);
1748 if (!d)
1749 return -ENOMEM;
1750
1751 *d = (struct inode_data) {
1752 .dev = dev,
1753 .ino = ino,
1754 .wd = -1,
1755 .fd = -1,
1756 .inotify_data = inotify_data,
1757 };
1758
1759 r = hashmap_put(inotify_data->inodes, d, d);
1760 if (r < 0) {
1761 free(d);
1762 return r;
1763 }
1764
1765 if (ret)
1766 *ret = d;
1767
1768 return 1;
1769 }
1770
1771 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1772 bool excl_unlink = true;
1773 uint32_t combined = 0;
1774 sd_event_source *s;
1775
1776 assert(d);
1777
1778 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1779 * the IN_EXCL_UNLINK flag is ANDed instead.
1780 *
1781 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1782 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1783 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and supress
1784 * events we don't care for client-side. */
1785
1786 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1787
1788 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1789 excl_unlink = false;
1790
1791 combined |= s->inotify.mask;
1792 }
1793
1794 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1795 }
1796
1797 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1798 uint32_t combined_mask;
1799 int wd, r;
1800
1801 assert(d);
1802 assert(d->fd >= 0);
1803
1804 combined_mask = inode_data_determine_mask(d);
1805
1806 if (d->wd >= 0 && combined_mask == d->combined_mask)
1807 return 0;
1808
1809 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1810 if (r < 0)
1811 return r;
1812
1813 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1814 if (wd < 0)
1815 return -errno;
1816
1817 if (d->wd < 0) {
1818 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1819 if (r < 0) {
1820 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1821 return r;
1822 }
1823
1824 d->wd = wd;
1825
1826 } else if (d->wd != wd) {
1827
1828 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1829 (void) inotify_rm_watch(d->fd, wd);
1830 return -EINVAL;
1831 }
1832
1833 d->combined_mask = combined_mask;
1834 return 1;
1835 }
1836
1837 _public_ int sd_event_add_inotify(
1838 sd_event *e,
1839 sd_event_source **ret,
1840 const char *path,
1841 uint32_t mask,
1842 sd_event_inotify_handler_t callback,
1843 void *userdata) {
1844
1845 bool rm_inotify = false, rm_inode = false;
1846 struct inotify_data *inotify_data = NULL;
1847 struct inode_data *inode_data = NULL;
1848 _cleanup_close_ int fd = -1;
1849 sd_event_source *s;
1850 struct stat st;
1851 int r;
1852
1853 assert_return(e, -EINVAL);
1854 assert_return(e = event_resolve(e), -ENOPKG);
1855 assert_return(path, -EINVAL);
1856 assert_return(callback, -EINVAL);
1857 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1858 assert_return(!event_pid_changed(e), -ECHILD);
1859
1860 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1861 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1862 * the user can't use them for us. */
1863 if (mask & IN_MASK_ADD)
1864 return -EINVAL;
1865
1866 fd = open(path, O_PATH|O_CLOEXEC|
1867 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1868 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1869 if (fd < 0)
1870 return -errno;
1871
1872 if (fstat(fd, &st) < 0)
1873 return -errno;
1874
1875 s = source_new(e, !ret, SOURCE_INOTIFY);
1876 if (!s)
1877 return -ENOMEM;
1878
1879 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1880 s->inotify.mask = mask;
1881 s->inotify.callback = callback;
1882 s->userdata = userdata;
1883
1884 /* Allocate an inotify object for this priority, and an inode object within it */
1885 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1886 if (r < 0)
1887 goto fail;
1888 rm_inotify = r > 0;
1889
1890 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1891 if (r < 0)
1892 goto fail;
1893 rm_inode = r > 0;
1894
1895 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1896 * the event source, until then, for which we need the original inode. */
1897 if (inode_data->fd < 0) {
1898 inode_data->fd = TAKE_FD(fd);
1899 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1900 }
1901
1902 /* Link our event source to the inode data object */
1903 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1904 s->inotify.inode_data = inode_data;
1905
1906 rm_inode = rm_inotify = false;
1907
1908 /* Actually realize the watch now */
1909 r = inode_data_realize_watch(e, inode_data);
1910 if (r < 0)
1911 goto fail;
1912
1913 (void) sd_event_source_set_description(s, path);
1914
1915 if (ret)
1916 *ret = s;
1917
1918 return 0;
1919
1920 fail:
1921 source_free(s);
1922
1923 if (rm_inode)
1924 event_free_inode_data(e, inode_data);
1925
1926 if (rm_inotify)
1927 event_free_inotify_data(e, inotify_data);
1928
1929 return r;
1930 }
1931
1932 _public_ sd_event_source* sd_event_source_ref(sd_event_source *s) {
1933
1934 if (!s)
1935 return NULL;
1936
1937 assert(s->n_ref >= 1);
1938 s->n_ref++;
1939
1940 return s;
1941 }
1942
1943 _public_ sd_event_source* sd_event_source_unref(sd_event_source *s) {
1944
1945 if (!s)
1946 return NULL;
1947
1948 assert(s->n_ref >= 1);
1949 s->n_ref--;
1950
1951 if (s->n_ref <= 0) {
1952 /* Here's a special hack: when we are called from a
1953 * dispatch handler we won't free the event source
1954 * immediately, but we will detach the fd from the
1955 * epoll. This way it is safe for the caller to unref
1956 * the event source and immediately close the fd, but
1957 * we still retain a valid event source object after
1958 * the callback. */
1959
1960 if (s->dispatching) {
1961 if (s->type == SOURCE_IO)
1962 source_io_unregister(s);
1963
1964 source_disconnect(s);
1965 } else
1966 source_free(s);
1967 }
1968
1969 return NULL;
1970 }
1971
1972 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1973 assert_return(s, -EINVAL);
1974 assert_return(!event_pid_changed(s->event), -ECHILD);
1975
1976 return free_and_strdup(&s->description, description);
1977 }
1978
1979 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1980 assert_return(s, -EINVAL);
1981 assert_return(description, -EINVAL);
1982 assert_return(s->description, -ENXIO);
1983 assert_return(!event_pid_changed(s->event), -ECHILD);
1984
1985 *description = s->description;
1986 return 0;
1987 }
1988
1989 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1990 assert_return(s, NULL);
1991
1992 return s->event;
1993 }
1994
1995 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1996 assert_return(s, -EINVAL);
1997 assert_return(s->type != SOURCE_EXIT, -EDOM);
1998 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1999 assert_return(!event_pid_changed(s->event), -ECHILD);
2000
2001 return s->pending;
2002 }
2003
2004 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
2005 assert_return(s, -EINVAL);
2006 assert_return(s->type == SOURCE_IO, -EDOM);
2007 assert_return(!event_pid_changed(s->event), -ECHILD);
2008
2009 return s->io.fd;
2010 }
2011
2012 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
2013 int r;
2014
2015 assert_return(s, -EINVAL);
2016 assert_return(fd >= 0, -EBADF);
2017 assert_return(s->type == SOURCE_IO, -EDOM);
2018 assert_return(!event_pid_changed(s->event), -ECHILD);
2019
2020 if (s->io.fd == fd)
2021 return 0;
2022
2023 if (s->enabled == SD_EVENT_OFF) {
2024 s->io.fd = fd;
2025 s->io.registered = false;
2026 } else {
2027 int saved_fd;
2028
2029 saved_fd = s->io.fd;
2030 assert(s->io.registered);
2031
2032 s->io.fd = fd;
2033 s->io.registered = false;
2034
2035 r = source_io_register(s, s->enabled, s->io.events);
2036 if (r < 0) {
2037 s->io.fd = saved_fd;
2038 s->io.registered = true;
2039 return r;
2040 }
2041
2042 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
2043 }
2044
2045 return 0;
2046 }
2047
2048 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
2049 assert_return(s, -EINVAL);
2050 assert_return(s->type == SOURCE_IO, -EDOM);
2051
2052 return s->io.owned;
2053 }
2054
2055 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
2056 assert_return(s, -EINVAL);
2057 assert_return(s->type == SOURCE_IO, -EDOM);
2058
2059 s->io.owned = own;
2060 return 0;
2061 }
2062
2063 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
2064 assert_return(s, -EINVAL);
2065 assert_return(events, -EINVAL);
2066 assert_return(s->type == SOURCE_IO, -EDOM);
2067 assert_return(!event_pid_changed(s->event), -ECHILD);
2068
2069 *events = s->io.events;
2070 return 0;
2071 }
2072
2073 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
2074 int r;
2075
2076 assert_return(s, -EINVAL);
2077 assert_return(s->type == SOURCE_IO, -EDOM);
2078 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
2079 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2080 assert_return(!event_pid_changed(s->event), -ECHILD);
2081
2082 /* edge-triggered updates are never skipped, so we can reset edges */
2083 if (s->io.events == events && !(events & EPOLLET))
2084 return 0;
2085
2086 r = source_set_pending(s, false);
2087 if (r < 0)
2088 return r;
2089
2090 if (s->enabled != SD_EVENT_OFF) {
2091 r = source_io_register(s, s->enabled, events);
2092 if (r < 0)
2093 return r;
2094 }
2095
2096 s->io.events = events;
2097
2098 return 0;
2099 }
2100
2101 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
2102 assert_return(s, -EINVAL);
2103 assert_return(revents, -EINVAL);
2104 assert_return(s->type == SOURCE_IO, -EDOM);
2105 assert_return(s->pending, -ENODATA);
2106 assert_return(!event_pid_changed(s->event), -ECHILD);
2107
2108 *revents = s->io.revents;
2109 return 0;
2110 }
2111
2112 _public_ int sd_event_source_get_signal(sd_event_source *s) {
2113 assert_return(s, -EINVAL);
2114 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
2115 assert_return(!event_pid_changed(s->event), -ECHILD);
2116
2117 return s->signal.sig;
2118 }
2119
2120 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
2121 assert_return(s, -EINVAL);
2122 assert_return(!event_pid_changed(s->event), -ECHILD);
2123
2124 *priority = s->priority;
2125 return 0;
2126 }
2127
2128 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
2129 bool rm_inotify = false, rm_inode = false;
2130 struct inotify_data *new_inotify_data = NULL;
2131 struct inode_data *new_inode_data = NULL;
2132 int r;
2133
2134 assert_return(s, -EINVAL);
2135 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2136 assert_return(!event_pid_changed(s->event), -ECHILD);
2137
2138 if (s->priority == priority)
2139 return 0;
2140
2141 if (s->type == SOURCE_INOTIFY) {
2142 struct inode_data *old_inode_data;
2143
2144 assert(s->inotify.inode_data);
2145 old_inode_data = s->inotify.inode_data;
2146
2147 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
2148 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
2149 * events we allow priority changes only until the first following iteration. */
2150 if (old_inode_data->fd < 0)
2151 return -EOPNOTSUPP;
2152
2153 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
2154 if (r < 0)
2155 return r;
2156 rm_inotify = r > 0;
2157
2158 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
2159 if (r < 0)
2160 goto fail;
2161 rm_inode = r > 0;
2162
2163 if (new_inode_data->fd < 0) {
2164 /* Duplicate the fd for the new inode object if we don't have any yet */
2165 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
2166 if (new_inode_data->fd < 0) {
2167 r = -errno;
2168 goto fail;
2169 }
2170
2171 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
2172 }
2173
2174 /* Move the event source to the new inode data structure */
2175 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
2176 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
2177 s->inotify.inode_data = new_inode_data;
2178
2179 /* Now create the new watch */
2180 r = inode_data_realize_watch(s->event, new_inode_data);
2181 if (r < 0) {
2182 /* Move it back */
2183 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
2184 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
2185 s->inotify.inode_data = old_inode_data;
2186 goto fail;
2187 }
2188
2189 s->priority = priority;
2190
2191 event_gc_inode_data(s->event, old_inode_data);
2192
2193 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
2194 struct signal_data *old, *d;
2195
2196 /* Move us from the signalfd belonging to the old
2197 * priority to the signalfd of the new priority */
2198
2199 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
2200
2201 s->priority = priority;
2202
2203 r = event_make_signal_data(s->event, s->signal.sig, &d);
2204 if (r < 0) {
2205 s->priority = old->priority;
2206 return r;
2207 }
2208
2209 event_unmask_signal_data(s->event, old, s->signal.sig);
2210 } else
2211 s->priority = priority;
2212
2213 if (s->pending)
2214 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2215
2216 if (s->prepare)
2217 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2218
2219 if (s->type == SOURCE_EXIT)
2220 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2221
2222 return 0;
2223
2224 fail:
2225 if (rm_inode)
2226 event_free_inode_data(s->event, new_inode_data);
2227
2228 if (rm_inotify)
2229 event_free_inotify_data(s->event, new_inotify_data);
2230
2231 return r;
2232 }
2233
2234 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
2235 assert_return(s, -EINVAL);
2236 assert_return(m, -EINVAL);
2237 assert_return(!event_pid_changed(s->event), -ECHILD);
2238
2239 *m = s->enabled;
2240 return 0;
2241 }
2242
2243 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
2244 int r;
2245
2246 assert_return(s, -EINVAL);
2247 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
2248 assert_return(!event_pid_changed(s->event), -ECHILD);
2249
2250 /* If we are dead anyway, we are fine with turning off
2251 * sources, but everything else needs to fail. */
2252 if (s->event->state == SD_EVENT_FINISHED)
2253 return m == SD_EVENT_OFF ? 0 : -ESTALE;
2254
2255 if (s->enabled == m)
2256 return 0;
2257
2258 if (m == SD_EVENT_OFF) {
2259
2260 /* Unset the pending flag when this event source is disabled */
2261 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2262 r = source_set_pending(s, false);
2263 if (r < 0)
2264 return r;
2265 }
2266
2267 switch (s->type) {
2268
2269 case SOURCE_IO:
2270 source_io_unregister(s);
2271 s->enabled = m;
2272 break;
2273
2274 case SOURCE_TIME_REALTIME:
2275 case SOURCE_TIME_BOOTTIME:
2276 case SOURCE_TIME_MONOTONIC:
2277 case SOURCE_TIME_REALTIME_ALARM:
2278 case SOURCE_TIME_BOOTTIME_ALARM: {
2279 struct clock_data *d;
2280
2281 s->enabled = m;
2282 d = event_get_clock_data(s->event, s->type);
2283 assert(d);
2284
2285 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2286 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2287 d->needs_rearm = true;
2288 break;
2289 }
2290
2291 case SOURCE_SIGNAL:
2292 s->enabled = m;
2293
2294 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2295 break;
2296
2297 case SOURCE_CHILD:
2298 s->enabled = m;
2299
2300 assert(s->event->n_enabled_child_sources > 0);
2301 s->event->n_enabled_child_sources--;
2302
2303 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2304 break;
2305
2306 case SOURCE_EXIT:
2307 s->enabled = m;
2308 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2309 break;
2310
2311 case SOURCE_DEFER:
2312 case SOURCE_POST:
2313 case SOURCE_INOTIFY:
2314 s->enabled = m;
2315 break;
2316
2317 default:
2318 assert_not_reached("Wut? I shouldn't exist.");
2319 }
2320
2321 } else {
2322
2323 /* Unset the pending flag when this event source is enabled */
2324 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2325 r = source_set_pending(s, false);
2326 if (r < 0)
2327 return r;
2328 }
2329
2330 switch (s->type) {
2331
2332 case SOURCE_IO:
2333 r = source_io_register(s, m, s->io.events);
2334 if (r < 0)
2335 return r;
2336
2337 s->enabled = m;
2338 break;
2339
2340 case SOURCE_TIME_REALTIME:
2341 case SOURCE_TIME_BOOTTIME:
2342 case SOURCE_TIME_MONOTONIC:
2343 case SOURCE_TIME_REALTIME_ALARM:
2344 case SOURCE_TIME_BOOTTIME_ALARM: {
2345 struct clock_data *d;
2346
2347 s->enabled = m;
2348 d = event_get_clock_data(s->event, s->type);
2349 assert(d);
2350
2351 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2352 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2353 d->needs_rearm = true;
2354 break;
2355 }
2356
2357 case SOURCE_SIGNAL:
2358
2359 s->enabled = m;
2360
2361 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2362 if (r < 0) {
2363 s->enabled = SD_EVENT_OFF;
2364 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2365 return r;
2366 }
2367
2368 break;
2369
2370 case SOURCE_CHILD:
2371
2372 if (s->enabled == SD_EVENT_OFF)
2373 s->event->n_enabled_child_sources++;
2374
2375 s->enabled = m;
2376
2377 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2378 if (r < 0) {
2379 s->enabled = SD_EVENT_OFF;
2380 s->event->n_enabled_child_sources--;
2381 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2382 return r;
2383 }
2384
2385 break;
2386
2387 case SOURCE_EXIT:
2388 s->enabled = m;
2389 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2390 break;
2391
2392 case SOURCE_DEFER:
2393 case SOURCE_POST:
2394 case SOURCE_INOTIFY:
2395 s->enabled = m;
2396 break;
2397
2398 default:
2399 assert_not_reached("Wut? I shouldn't exist.");
2400 }
2401 }
2402
2403 if (s->pending)
2404 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2405
2406 if (s->prepare)
2407 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2408
2409 return 0;
2410 }
2411
2412 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2413 assert_return(s, -EINVAL);
2414 assert_return(usec, -EINVAL);
2415 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2416 assert_return(!event_pid_changed(s->event), -ECHILD);
2417
2418 *usec = s->time.next;
2419 return 0;
2420 }
2421
2422 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2423 struct clock_data *d;
2424 int r;
2425
2426 assert_return(s, -EINVAL);
2427 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2428 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2429 assert_return(!event_pid_changed(s->event), -ECHILD);
2430
2431 r = source_set_pending(s, false);
2432 if (r < 0)
2433 return r;
2434
2435 s->time.next = usec;
2436
2437 d = event_get_clock_data(s->event, s->type);
2438 assert(d);
2439
2440 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2441 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2442 d->needs_rearm = true;
2443
2444 return 0;
2445 }
2446
2447 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2448 assert_return(s, -EINVAL);
2449 assert_return(usec, -EINVAL);
2450 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2451 assert_return(!event_pid_changed(s->event), -ECHILD);
2452
2453 *usec = s->time.accuracy;
2454 return 0;
2455 }
2456
2457 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2458 struct clock_data *d;
2459 int r;
2460
2461 assert_return(s, -EINVAL);
2462 assert_return(usec != (uint64_t) -1, -EINVAL);
2463 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2464 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2465 assert_return(!event_pid_changed(s->event), -ECHILD);
2466
2467 r = source_set_pending(s, false);
2468 if (r < 0)
2469 return r;
2470
2471 if (usec == 0)
2472 usec = DEFAULT_ACCURACY_USEC;
2473
2474 s->time.accuracy = usec;
2475
2476 d = event_get_clock_data(s->event, s->type);
2477 assert(d);
2478
2479 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2480 d->needs_rearm = true;
2481
2482 return 0;
2483 }
2484
2485 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2486 assert_return(s, -EINVAL);
2487 assert_return(clock, -EINVAL);
2488 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2489 assert_return(!event_pid_changed(s->event), -ECHILD);
2490
2491 *clock = event_source_type_to_clock(s->type);
2492 return 0;
2493 }
2494
2495 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2496 assert_return(s, -EINVAL);
2497 assert_return(pid, -EINVAL);
2498 assert_return(s->type == SOURCE_CHILD, -EDOM);
2499 assert_return(!event_pid_changed(s->event), -ECHILD);
2500
2501 *pid = s->child.pid;
2502 return 0;
2503 }
2504
2505 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2506 assert_return(s, -EINVAL);
2507 assert_return(mask, -EINVAL);
2508 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2509 assert_return(!event_pid_changed(s->event), -ECHILD);
2510
2511 *mask = s->inotify.mask;
2512 return 0;
2513 }
2514
2515 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2516 int r;
2517
2518 assert_return(s, -EINVAL);
2519 assert_return(s->type != SOURCE_EXIT, -EDOM);
2520 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2521 assert_return(!event_pid_changed(s->event), -ECHILD);
2522
2523 if (s->prepare == callback)
2524 return 0;
2525
2526 if (callback && s->prepare) {
2527 s->prepare = callback;
2528 return 0;
2529 }
2530
2531 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2532 if (r < 0)
2533 return r;
2534
2535 s->prepare = callback;
2536
2537 if (callback) {
2538 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2539 if (r < 0)
2540 return r;
2541 } else
2542 prioq_remove(s->event->prepare, s, &s->prepare_index);
2543
2544 return 0;
2545 }
2546
2547 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2548 assert_return(s, NULL);
2549
2550 return s->userdata;
2551 }
2552
2553 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2554 void *ret;
2555
2556 assert_return(s, NULL);
2557
2558 ret = s->userdata;
2559 s->userdata = userdata;
2560
2561 return ret;
2562 }
2563
2564 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2565 usec_t c;
2566 assert(e);
2567 assert(a <= b);
2568
2569 if (a <= 0)
2570 return 0;
2571 if (a >= USEC_INFINITY)
2572 return USEC_INFINITY;
2573
2574 if (b <= a + 1)
2575 return a;
2576
2577 initialize_perturb(e);
2578
2579 /*
2580 Find a good time to wake up again between times a and b. We
2581 have two goals here:
2582
2583 a) We want to wake up as seldom as possible, hence prefer
2584 later times over earlier times.
2585
2586 b) But if we have to wake up, then let's make sure to
2587 dispatch as much as possible on the entire system.
2588
2589 We implement this by waking up everywhere at the same time
2590 within any given minute if we can, synchronised via the
2591 perturbation value determined from the boot ID. If we can't,
2592 then we try to find the same spot in every 10s, then 1s and
2593 then 250ms step. Otherwise, we pick the last possible time
2594 to wake up.
2595 */
2596
2597 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2598 if (c >= b) {
2599 if (_unlikely_(c < USEC_PER_MINUTE))
2600 return b;
2601
2602 c -= USEC_PER_MINUTE;
2603 }
2604
2605 if (c >= a)
2606 return c;
2607
2608 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2609 if (c >= b) {
2610 if (_unlikely_(c < USEC_PER_SEC*10))
2611 return b;
2612
2613 c -= USEC_PER_SEC*10;
2614 }
2615
2616 if (c >= a)
2617 return c;
2618
2619 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2620 if (c >= b) {
2621 if (_unlikely_(c < USEC_PER_SEC))
2622 return b;
2623
2624 c -= USEC_PER_SEC;
2625 }
2626
2627 if (c >= a)
2628 return c;
2629
2630 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2631 if (c >= b) {
2632 if (_unlikely_(c < USEC_PER_MSEC*250))
2633 return b;
2634
2635 c -= USEC_PER_MSEC*250;
2636 }
2637
2638 if (c >= a)
2639 return c;
2640
2641 return b;
2642 }
2643
2644 static int event_arm_timer(
2645 sd_event *e,
2646 struct clock_data *d) {
2647
2648 struct itimerspec its = {};
2649 sd_event_source *a, *b;
2650 usec_t t;
2651 int r;
2652
2653 assert(e);
2654 assert(d);
2655
2656 if (!d->needs_rearm)
2657 return 0;
2658 else
2659 d->needs_rearm = false;
2660
2661 a = prioq_peek(d->earliest);
2662 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2663
2664 if (d->fd < 0)
2665 return 0;
2666
2667 if (d->next == USEC_INFINITY)
2668 return 0;
2669
2670 /* disarm */
2671 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2672 if (r < 0)
2673 return r;
2674
2675 d->next = USEC_INFINITY;
2676 return 0;
2677 }
2678
2679 b = prioq_peek(d->latest);
2680 assert_se(b && b->enabled != SD_EVENT_OFF);
2681
2682 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2683 if (d->next == t)
2684 return 0;
2685
2686 assert_se(d->fd >= 0);
2687
2688 if (t == 0) {
2689 /* We don' want to disarm here, just mean some time looooong ago. */
2690 its.it_value.tv_sec = 0;
2691 its.it_value.tv_nsec = 1;
2692 } else
2693 timespec_store(&its.it_value, t);
2694
2695 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2696 if (r < 0)
2697 return -errno;
2698
2699 d->next = t;
2700 return 0;
2701 }
2702
2703 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2704 assert(e);
2705 assert(s);
2706 assert(s->type == SOURCE_IO);
2707
2708 /* If the event source was already pending, we just OR in the
2709 * new revents, otherwise we reset the value. The ORing is
2710 * necessary to handle EPOLLONESHOT events properly where
2711 * readability might happen independently of writability, and
2712 * we need to keep track of both */
2713
2714 if (s->pending)
2715 s->io.revents |= revents;
2716 else
2717 s->io.revents = revents;
2718
2719 return source_set_pending(s, true);
2720 }
2721
2722 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2723 uint64_t x;
2724 ssize_t ss;
2725
2726 assert(e);
2727 assert(fd >= 0);
2728
2729 assert_return(events == EPOLLIN, -EIO);
2730
2731 ss = read(fd, &x, sizeof(x));
2732 if (ss < 0) {
2733 if (IN_SET(errno, EAGAIN, EINTR))
2734 return 0;
2735
2736 return -errno;
2737 }
2738
2739 if (_unlikely_(ss != sizeof(x)))
2740 return -EIO;
2741
2742 if (next)
2743 *next = USEC_INFINITY;
2744
2745 return 0;
2746 }
2747
2748 static int process_timer(
2749 sd_event *e,
2750 usec_t n,
2751 struct clock_data *d) {
2752
2753 sd_event_source *s;
2754 int r;
2755
2756 assert(e);
2757 assert(d);
2758
2759 for (;;) {
2760 s = prioq_peek(d->earliest);
2761 if (!s ||
2762 s->time.next > n ||
2763 s->enabled == SD_EVENT_OFF ||
2764 s->pending)
2765 break;
2766
2767 r = source_set_pending(s, true);
2768 if (r < 0)
2769 return r;
2770
2771 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2772 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2773 d->needs_rearm = true;
2774 }
2775
2776 return 0;
2777 }
2778
2779 static int process_child(sd_event *e) {
2780 sd_event_source *s;
2781 Iterator i;
2782 int r;
2783
2784 assert(e);
2785
2786 e->need_process_child = false;
2787
2788 /*
2789 So, this is ugly. We iteratively invoke waitid() with P_PID
2790 + WNOHANG for each PID we wait for, instead of using
2791 P_ALL. This is because we only want to get child
2792 information of very specific child processes, and not all
2793 of them. We might not have processed the SIGCHLD even of a
2794 previous invocation and we don't want to maintain a
2795 unbounded *per-child* event queue, hence we really don't
2796 want anything flushed out of the kernel's queue that we
2797 don't care about. Since this is O(n) this means that if you
2798 have a lot of processes you probably want to handle SIGCHLD
2799 yourself.
2800
2801 We do not reap the children here (by using WNOWAIT), this
2802 is only done after the event source is dispatched so that
2803 the callback still sees the process as a zombie.
2804 */
2805
2806 HASHMAP_FOREACH(s, e->child_sources, i) {
2807 assert(s->type == SOURCE_CHILD);
2808
2809 if (s->pending)
2810 continue;
2811
2812 if (s->enabled == SD_EVENT_OFF)
2813 continue;
2814
2815 zero(s->child.siginfo);
2816 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2817 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2818 if (r < 0)
2819 return -errno;
2820
2821 if (s->child.siginfo.si_pid != 0) {
2822 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2823
2824 if (!zombie && (s->child.options & WEXITED)) {
2825 /* If the child isn't dead then let's
2826 * immediately remove the state change
2827 * from the queue, since there's no
2828 * benefit in leaving it queued */
2829
2830 assert(s->child.options & (WSTOPPED|WCONTINUED));
2831 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2832 }
2833
2834 r = source_set_pending(s, true);
2835 if (r < 0)
2836 return r;
2837 }
2838 }
2839
2840 return 0;
2841 }
2842
2843 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2844 bool read_one = false;
2845 int r;
2846
2847 assert(e);
2848 assert(d);
2849 assert_return(events == EPOLLIN, -EIO);
2850
2851 /* If there's a signal queued on this priority and SIGCHLD is
2852 on this priority too, then make sure to recheck the
2853 children we watch. This is because we only ever dequeue
2854 the first signal per priority, and if we dequeue one, and
2855 SIGCHLD might be enqueued later we wouldn't know, but we
2856 might have higher priority children we care about hence we
2857 need to check that explicitly. */
2858
2859 if (sigismember(&d->sigset, SIGCHLD))
2860 e->need_process_child = true;
2861
2862 /* If there's already an event source pending for this
2863 * priority we don't read another */
2864 if (d->current)
2865 return 0;
2866
2867 for (;;) {
2868 struct signalfd_siginfo si;
2869 ssize_t n;
2870 sd_event_source *s = NULL;
2871
2872 n = read(d->fd, &si, sizeof(si));
2873 if (n < 0) {
2874 if (IN_SET(errno, EAGAIN, EINTR))
2875 return read_one;
2876
2877 return -errno;
2878 }
2879
2880 if (_unlikely_(n != sizeof(si)))
2881 return -EIO;
2882
2883 assert(SIGNAL_VALID(si.ssi_signo));
2884
2885 read_one = true;
2886
2887 if (e->signal_sources)
2888 s = e->signal_sources[si.ssi_signo];
2889 if (!s)
2890 continue;
2891 if (s->pending)
2892 continue;
2893
2894 s->signal.siginfo = si;
2895 d->current = s;
2896
2897 r = source_set_pending(s, true);
2898 if (r < 0)
2899 return r;
2900
2901 return 1;
2902 }
2903 }
2904
2905 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2906 ssize_t n;
2907
2908 assert(e);
2909 assert(d);
2910
2911 assert_return(revents == EPOLLIN, -EIO);
2912
2913 /* If there's already an event source pending for this priority, don't read another */
2914 if (d->n_pending > 0)
2915 return 0;
2916
2917 /* Is the read buffer non-empty? If so, let's not read more */
2918 if (d->buffer_filled > 0)
2919 return 0;
2920
2921 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2922 if (n < 0) {
2923 if (IN_SET(errno, EAGAIN, EINTR))
2924 return 0;
2925
2926 return -errno;
2927 }
2928
2929 assert(n > 0);
2930 d->buffer_filled = (size_t) n;
2931 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2932
2933 return 1;
2934 }
2935
2936 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2937 assert(e);
2938 assert(d);
2939 assert(sz <= d->buffer_filled);
2940
2941 if (sz == 0)
2942 return;
2943
2944 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2945 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2946 d->buffer_filled -= sz;
2947
2948 if (d->buffer_filled == 0)
2949 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2950 }
2951
2952 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2953 int r;
2954
2955 assert(e);
2956 assert(d);
2957
2958 /* If there's already an event source pending for this priority, don't read another */
2959 if (d->n_pending > 0)
2960 return 0;
2961
2962 while (d->buffer_filled > 0) {
2963 size_t sz;
2964
2965 /* Let's validate that the event structures are complete */
2966 if (d->buffer_filled < offsetof(struct inotify_event, name))
2967 return -EIO;
2968
2969 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2970 if (d->buffer_filled < sz)
2971 return -EIO;
2972
2973 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2974 struct inode_data *inode_data;
2975 Iterator i;
2976
2977 /* The queue overran, let's pass this event to all event sources connected to this inotify
2978 * object */
2979
2980 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2981 sd_event_source *s;
2982
2983 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2984
2985 if (s->enabled == SD_EVENT_OFF)
2986 continue;
2987
2988 r = source_set_pending(s, true);
2989 if (r < 0)
2990 return r;
2991 }
2992 }
2993 } else {
2994 struct inode_data *inode_data;
2995 sd_event_source *s;
2996
2997 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2998 * our watch descriptor table. */
2999 if (d->buffer.ev.mask & IN_IGNORED) {
3000
3001 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3002 if (!inode_data) {
3003 event_inotify_data_drop(e, d, sz);
3004 continue;
3005 }
3006
3007 /* The watch descriptor was removed by the kernel, let's drop it here too */
3008 inode_data->wd = -1;
3009 } else {
3010 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
3011 if (!inode_data) {
3012 event_inotify_data_drop(e, d, sz);
3013 continue;
3014 }
3015 }
3016
3017 /* Trigger all event sources that are interested in these events. Also trigger all event
3018 * sources if IN_IGNORED or IN_UNMOUNT is set. */
3019 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
3020
3021 if (s->enabled == SD_EVENT_OFF)
3022 continue;
3023
3024 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
3025 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
3026 continue;
3027
3028 r = source_set_pending(s, true);
3029 if (r < 0)
3030 return r;
3031 }
3032 }
3033
3034 /* Something pending now? If so, let's finish, otherwise let's read more. */
3035 if (d->n_pending > 0)
3036 return 1;
3037 }
3038
3039 return 0;
3040 }
3041
3042 static int process_inotify(sd_event *e) {
3043 struct inotify_data *d;
3044 int r, done = 0;
3045
3046 assert(e);
3047
3048 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
3049 r = event_inotify_data_process(e, d);
3050 if (r < 0)
3051 return r;
3052 if (r > 0)
3053 done ++;
3054 }
3055
3056 return done;
3057 }
3058
3059 static int source_dispatch(sd_event_source *s) {
3060 EventSourceType saved_type;
3061 int r = 0;
3062
3063 assert(s);
3064 assert(s->pending || s->type == SOURCE_EXIT);
3065
3066 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
3067 * the event. */
3068 saved_type = s->type;
3069
3070 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
3071 r = source_set_pending(s, false);
3072 if (r < 0)
3073 return r;
3074 }
3075
3076 if (s->type != SOURCE_POST) {
3077 sd_event_source *z;
3078 Iterator i;
3079
3080 /* If we execute a non-post source, let's mark all
3081 * post sources as pending */
3082
3083 SET_FOREACH(z, s->event->post_sources, i) {
3084 if (z->enabled == SD_EVENT_OFF)
3085 continue;
3086
3087 r = source_set_pending(z, true);
3088 if (r < 0)
3089 return r;
3090 }
3091 }
3092
3093 if (s->enabled == SD_EVENT_ONESHOT) {
3094 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
3095 if (r < 0)
3096 return r;
3097 }
3098
3099 s->dispatching = true;
3100
3101 switch (s->type) {
3102
3103 case SOURCE_IO:
3104 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
3105 break;
3106
3107 case SOURCE_TIME_REALTIME:
3108 case SOURCE_TIME_BOOTTIME:
3109 case SOURCE_TIME_MONOTONIC:
3110 case SOURCE_TIME_REALTIME_ALARM:
3111 case SOURCE_TIME_BOOTTIME_ALARM:
3112 r = s->time.callback(s, s->time.next, s->userdata);
3113 break;
3114
3115 case SOURCE_SIGNAL:
3116 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
3117 break;
3118
3119 case SOURCE_CHILD: {
3120 bool zombie;
3121
3122 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
3123
3124 r = s->child.callback(s, &s->child.siginfo, s->userdata);
3125
3126 /* Now, reap the PID for good. */
3127 if (zombie)
3128 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
3129
3130 break;
3131 }
3132
3133 case SOURCE_DEFER:
3134 r = s->defer.callback(s, s->userdata);
3135 break;
3136
3137 case SOURCE_POST:
3138 r = s->post.callback(s, s->userdata);
3139 break;
3140
3141 case SOURCE_EXIT:
3142 r = s->exit.callback(s, s->userdata);
3143 break;
3144
3145 case SOURCE_INOTIFY: {
3146 struct sd_event *e = s->event;
3147 struct inotify_data *d;
3148 size_t sz;
3149
3150 assert(s->inotify.inode_data);
3151 assert_se(d = s->inotify.inode_data->inotify_data);
3152
3153 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
3154 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
3155 assert(d->buffer_filled >= sz);
3156
3157 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
3158
3159 /* When no event is pending anymore on this inotify object, then let's drop the event from the
3160 * buffer. */
3161 if (d->n_pending == 0)
3162 event_inotify_data_drop(e, d, sz);
3163
3164 break;
3165 }
3166
3167 case SOURCE_WATCHDOG:
3168 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
3169 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
3170 assert_not_reached("Wut? I shouldn't exist.");
3171 }
3172
3173 s->dispatching = false;
3174
3175 if (r < 0)
3176 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
3177 strna(s->description), event_source_type_to_string(saved_type));
3178
3179 if (s->n_ref == 0)
3180 source_free(s);
3181 else if (r < 0)
3182 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3183
3184 return 1;
3185 }
3186
3187 static int event_prepare(sd_event *e) {
3188 int r;
3189
3190 assert(e);
3191
3192 for (;;) {
3193 sd_event_source *s;
3194
3195 s = prioq_peek(e->prepare);
3196 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
3197 break;
3198
3199 s->prepare_iteration = e->iteration;
3200 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
3201 if (r < 0)
3202 return r;
3203
3204 assert(s->prepare);
3205
3206 s->dispatching = true;
3207 r = s->prepare(s, s->userdata);
3208 s->dispatching = false;
3209
3210 if (r < 0)
3211 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
3212 strna(s->description), event_source_type_to_string(s->type));
3213
3214 if (s->n_ref == 0)
3215 source_free(s);
3216 else if (r < 0)
3217 sd_event_source_set_enabled(s, SD_EVENT_OFF);
3218 }
3219
3220 return 0;
3221 }
3222
3223 static int dispatch_exit(sd_event *e) {
3224 sd_event_source *p;
3225 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3226 int r;
3227
3228 assert(e);
3229
3230 p = prioq_peek(e->exit);
3231 if (!p || p->enabled == SD_EVENT_OFF) {
3232 e->state = SD_EVENT_FINISHED;
3233 return 0;
3234 }
3235
3236 ref = sd_event_ref(e);
3237 e->iteration++;
3238 e->state = SD_EVENT_EXITING;
3239 r = source_dispatch(p);
3240 e->state = SD_EVENT_INITIAL;
3241 return r;
3242 }
3243
3244 static sd_event_source* event_next_pending(sd_event *e) {
3245 sd_event_source *p;
3246
3247 assert(e);
3248
3249 p = prioq_peek(e->pending);
3250 if (!p)
3251 return NULL;
3252
3253 if (p->enabled == SD_EVENT_OFF)
3254 return NULL;
3255
3256 return p;
3257 }
3258
3259 static int arm_watchdog(sd_event *e) {
3260 struct itimerspec its = {};
3261 usec_t t;
3262 int r;
3263
3264 assert(e);
3265 assert(e->watchdog_fd >= 0);
3266
3267 t = sleep_between(e,
3268 e->watchdog_last + (e->watchdog_period / 2),
3269 e->watchdog_last + (e->watchdog_period * 3 / 4));
3270
3271 timespec_store(&its.it_value, t);
3272
3273 /* Make sure we never set the watchdog to 0, which tells the
3274 * kernel to disable it. */
3275 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3276 its.it_value.tv_nsec = 1;
3277
3278 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3279 if (r < 0)
3280 return -errno;
3281
3282 return 0;
3283 }
3284
3285 static int process_watchdog(sd_event *e) {
3286 assert(e);
3287
3288 if (!e->watchdog)
3289 return 0;
3290
3291 /* Don't notify watchdog too often */
3292 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3293 return 0;
3294
3295 sd_notify(false, "WATCHDOG=1");
3296 e->watchdog_last = e->timestamp.monotonic;
3297
3298 return arm_watchdog(e);
3299 }
3300
3301 static void event_close_inode_data_fds(sd_event *e) {
3302 struct inode_data *d;
3303
3304 assert(e);
3305
3306 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3307 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3308 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3309 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3310 * compromise. */
3311
3312 while ((d = e->inode_data_to_close)) {
3313 assert(d->fd >= 0);
3314 d->fd = safe_close(d->fd);
3315
3316 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3317 }
3318 }
3319
3320 _public_ int sd_event_prepare(sd_event *e) {
3321 int r;
3322
3323 assert_return(e, -EINVAL);
3324 assert_return(e = event_resolve(e), -ENOPKG);
3325 assert_return(!event_pid_changed(e), -ECHILD);
3326 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3327 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3328
3329 if (e->exit_requested)
3330 goto pending;
3331
3332 e->iteration++;
3333
3334 e->state = SD_EVENT_PREPARING;
3335 r = event_prepare(e);
3336 e->state = SD_EVENT_INITIAL;
3337 if (r < 0)
3338 return r;
3339
3340 r = event_arm_timer(e, &e->realtime);
3341 if (r < 0)
3342 return r;
3343
3344 r = event_arm_timer(e, &e->boottime);
3345 if (r < 0)
3346 return r;
3347
3348 r = event_arm_timer(e, &e->monotonic);
3349 if (r < 0)
3350 return r;
3351
3352 r = event_arm_timer(e, &e->realtime_alarm);
3353 if (r < 0)
3354 return r;
3355
3356 r = event_arm_timer(e, &e->boottime_alarm);
3357 if (r < 0)
3358 return r;
3359
3360 event_close_inode_data_fds(e);
3361
3362 if (event_next_pending(e) || e->need_process_child)
3363 goto pending;
3364
3365 e->state = SD_EVENT_ARMED;
3366
3367 return 0;
3368
3369 pending:
3370 e->state = SD_EVENT_ARMED;
3371 r = sd_event_wait(e, 0);
3372 if (r == 0)
3373 e->state = SD_EVENT_ARMED;
3374
3375 return r;
3376 }
3377
3378 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3379 struct epoll_event *ev_queue;
3380 unsigned ev_queue_max;
3381 int r, m, i;
3382
3383 assert_return(e, -EINVAL);
3384 assert_return(e = event_resolve(e), -ENOPKG);
3385 assert_return(!event_pid_changed(e), -ECHILD);
3386 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3387 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3388
3389 if (e->exit_requested) {
3390 e->state = SD_EVENT_PENDING;
3391 return 1;
3392 }
3393
3394 ev_queue_max = MAX(e->n_sources, 1u);
3395 ev_queue = newa(struct epoll_event, ev_queue_max);
3396
3397 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3398 if (e->inotify_data_buffered)
3399 timeout = 0;
3400
3401 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3402 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3403 if (m < 0) {
3404 if (errno == EINTR) {
3405 e->state = SD_EVENT_PENDING;
3406 return 1;
3407 }
3408
3409 r = -errno;
3410 goto finish;
3411 }
3412
3413 triple_timestamp_get(&e->timestamp);
3414
3415 for (i = 0; i < m; i++) {
3416
3417 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3418 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3419 else {
3420 WakeupType *t = ev_queue[i].data.ptr;
3421
3422 switch (*t) {
3423
3424 case WAKEUP_EVENT_SOURCE:
3425 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3426 break;
3427
3428 case WAKEUP_CLOCK_DATA: {
3429 struct clock_data *d = ev_queue[i].data.ptr;
3430 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3431 break;
3432 }
3433
3434 case WAKEUP_SIGNAL_DATA:
3435 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3436 break;
3437
3438 case WAKEUP_INOTIFY_DATA:
3439 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3440 break;
3441
3442 default:
3443 assert_not_reached("Invalid wake-up pointer");
3444 }
3445 }
3446 if (r < 0)
3447 goto finish;
3448 }
3449
3450 r = process_watchdog(e);
3451 if (r < 0)
3452 goto finish;
3453
3454 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3455 if (r < 0)
3456 goto finish;
3457
3458 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3459 if (r < 0)
3460 goto finish;
3461
3462 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3463 if (r < 0)
3464 goto finish;
3465
3466 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3467 if (r < 0)
3468 goto finish;
3469
3470 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3471 if (r < 0)
3472 goto finish;
3473
3474 if (e->need_process_child) {
3475 r = process_child(e);
3476 if (r < 0)
3477 goto finish;
3478 }
3479
3480 r = process_inotify(e);
3481 if (r < 0)
3482 goto finish;
3483
3484 if (event_next_pending(e)) {
3485 e->state = SD_EVENT_PENDING;
3486
3487 return 1;
3488 }
3489
3490 r = 0;
3491
3492 finish:
3493 e->state = SD_EVENT_INITIAL;
3494
3495 return r;
3496 }
3497
3498 _public_ int sd_event_dispatch(sd_event *e) {
3499 sd_event_source *p;
3500 int r;
3501
3502 assert_return(e, -EINVAL);
3503 assert_return(e = event_resolve(e), -ENOPKG);
3504 assert_return(!event_pid_changed(e), -ECHILD);
3505 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3506 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3507
3508 if (e->exit_requested)
3509 return dispatch_exit(e);
3510
3511 p = event_next_pending(e);
3512 if (p) {
3513 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3514
3515 ref = sd_event_ref(e);
3516 e->state = SD_EVENT_RUNNING;
3517 r = source_dispatch(p);
3518 e->state = SD_EVENT_INITIAL;
3519 return r;
3520 }
3521
3522 e->state = SD_EVENT_INITIAL;
3523
3524 return 1;
3525 }
3526
3527 static void event_log_delays(sd_event *e) {
3528 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3529 unsigned i;
3530 int o;
3531
3532 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3533 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3534 e->delays[i] = 0;
3535 }
3536 log_debug("Event loop iterations: %.*s", o, b);
3537 }
3538
3539 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3540 int r;
3541
3542 assert_return(e, -EINVAL);
3543 assert_return(e = event_resolve(e), -ENOPKG);
3544 assert_return(!event_pid_changed(e), -ECHILD);
3545 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3546 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3547
3548 if (e->profile_delays && e->last_run) {
3549 usec_t this_run;
3550 unsigned l;
3551
3552 this_run = now(CLOCK_MONOTONIC);
3553
3554 l = u64log2(this_run - e->last_run);
3555 assert(l < sizeof(e->delays));
3556 e->delays[l]++;
3557
3558 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3559 event_log_delays(e);
3560 e->last_log = this_run;
3561 }
3562 }
3563
3564 r = sd_event_prepare(e);
3565 if (r == 0)
3566 /* There was nothing? Then wait... */
3567 r = sd_event_wait(e, timeout);
3568
3569 if (e->profile_delays)
3570 e->last_run = now(CLOCK_MONOTONIC);
3571
3572 if (r > 0) {
3573 /* There's something now, then let's dispatch it */
3574 r = sd_event_dispatch(e);
3575 if (r < 0)
3576 return r;
3577
3578 return 1;
3579 }
3580
3581 return r;
3582 }
3583
3584 _public_ int sd_event_loop(sd_event *e) {
3585 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3586 int r;
3587
3588 assert_return(e, -EINVAL);
3589 assert_return(e = event_resolve(e), -ENOPKG);
3590 assert_return(!event_pid_changed(e), -ECHILD);
3591 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3592
3593 ref = sd_event_ref(e);
3594
3595 while (e->state != SD_EVENT_FINISHED) {
3596 r = sd_event_run(e, (uint64_t) -1);
3597 if (r < 0)
3598 return r;
3599 }
3600
3601 return e->exit_code;
3602 }
3603
3604 _public_ int sd_event_get_fd(sd_event *e) {
3605
3606 assert_return(e, -EINVAL);
3607 assert_return(e = event_resolve(e), -ENOPKG);
3608 assert_return(!event_pid_changed(e), -ECHILD);
3609
3610 return e->epoll_fd;
3611 }
3612
3613 _public_ int sd_event_get_state(sd_event *e) {
3614 assert_return(e, -EINVAL);
3615 assert_return(e = event_resolve(e), -ENOPKG);
3616 assert_return(!event_pid_changed(e), -ECHILD);
3617
3618 return e->state;
3619 }
3620
3621 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3622 assert_return(e, -EINVAL);
3623 assert_return(e = event_resolve(e), -ENOPKG);
3624 assert_return(code, -EINVAL);
3625 assert_return(!event_pid_changed(e), -ECHILD);
3626
3627 if (!e->exit_requested)
3628 return -ENODATA;
3629
3630 *code = e->exit_code;
3631 return 0;
3632 }
3633
3634 _public_ int sd_event_exit(sd_event *e, int code) {
3635 assert_return(e, -EINVAL);
3636 assert_return(e = event_resolve(e), -ENOPKG);
3637 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3638 assert_return(!event_pid_changed(e), -ECHILD);
3639
3640 e->exit_requested = true;
3641 e->exit_code = code;
3642
3643 return 0;
3644 }
3645
3646 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3647 assert_return(e, -EINVAL);
3648 assert_return(e = event_resolve(e), -ENOPKG);
3649 assert_return(usec, -EINVAL);
3650 assert_return(!event_pid_changed(e), -ECHILD);
3651
3652 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3653 return -EOPNOTSUPP;
3654
3655 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3656 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3657 * the purpose of getting the time this doesn't matter. */
3658 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3659 return -EOPNOTSUPP;
3660
3661 if (!triple_timestamp_is_set(&e->timestamp)) {
3662 /* Implicitly fall back to now() if we never ran
3663 * before and thus have no cached time. */
3664 *usec = now(clock);
3665 return 1;
3666 }
3667
3668 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3669 return 0;
3670 }
3671
3672 _public_ int sd_event_default(sd_event **ret) {
3673 sd_event *e = NULL;
3674 int r;
3675
3676 if (!ret)
3677 return !!default_event;
3678
3679 if (default_event) {
3680 *ret = sd_event_ref(default_event);
3681 return 0;
3682 }
3683
3684 r = sd_event_new(&e);
3685 if (r < 0)
3686 return r;
3687
3688 e->default_event_ptr = &default_event;
3689 e->tid = gettid();
3690 default_event = e;
3691
3692 *ret = e;
3693 return 1;
3694 }
3695
3696 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3697 assert_return(e, -EINVAL);
3698 assert_return(e = event_resolve(e), -ENOPKG);
3699 assert_return(tid, -EINVAL);
3700 assert_return(!event_pid_changed(e), -ECHILD);
3701
3702 if (e->tid != 0) {
3703 *tid = e->tid;
3704 return 0;
3705 }
3706
3707 return -ENXIO;
3708 }
3709
3710 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3711 int r;
3712
3713 assert_return(e, -EINVAL);
3714 assert_return(e = event_resolve(e), -ENOPKG);
3715 assert_return(!event_pid_changed(e), -ECHILD);
3716
3717 if (e->watchdog == !!b)
3718 return e->watchdog;
3719
3720 if (b) {
3721 struct epoll_event ev;
3722
3723 r = sd_watchdog_enabled(false, &e->watchdog_period);
3724 if (r <= 0)
3725 return r;
3726
3727 /* Issue first ping immediately */
3728 sd_notify(false, "WATCHDOG=1");
3729 e->watchdog_last = now(CLOCK_MONOTONIC);
3730
3731 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3732 if (e->watchdog_fd < 0)
3733 return -errno;
3734
3735 r = arm_watchdog(e);
3736 if (r < 0)
3737 goto fail;
3738
3739 ev = (struct epoll_event) {
3740 .events = EPOLLIN,
3741 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3742 };
3743
3744 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3745 if (r < 0) {
3746 r = -errno;
3747 goto fail;
3748 }
3749
3750 } else {
3751 if (e->watchdog_fd >= 0) {
3752 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3753 e->watchdog_fd = safe_close(e->watchdog_fd);
3754 }
3755 }
3756
3757 e->watchdog = !!b;
3758 return e->watchdog;
3759
3760 fail:
3761 e->watchdog_fd = safe_close(e->watchdog_fd);
3762 return r;
3763 }
3764
3765 _public_ int sd_event_get_watchdog(sd_event *e) {
3766 assert_return(e, -EINVAL);
3767 assert_return(e = event_resolve(e), -ENOPKG);
3768 assert_return(!event_pid_changed(e), -ECHILD);
3769
3770 return e->watchdog;
3771 }
3772
3773 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3774 assert_return(e, -EINVAL);
3775 assert_return(e = event_resolve(e), -ENOPKG);
3776 assert_return(!event_pid_changed(e), -ECHILD);
3777
3778 *ret = e->iteration;
3779 return 0;
3780 }
3781
3782 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3783 assert_return(s, -EINVAL);
3784
3785 s->destroy_callback = callback;
3786 return 0;
3787 }
3788
3789 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3790 assert_return(s, -EINVAL);
3791
3792 if (ret)
3793 *ret = s->destroy_callback;
3794
3795 return !!s->destroy_callback;
3796 }