]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
0030ea5dbe88aa7b3a9f8191ed2a53b964f0a723
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "event-source.h"
13 #include "fd-util.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "list.h"
17 #include "macro.h"
18 #include "missing.h"
19 #include "prioq.h"
20 #include "process-util.h"
21 #include "set.h"
22 #include "signal-util.h"
23 #include "string-table.h"
24 #include "string-util.h"
25 #include "time-util.h"
26 #include "util.h"
27
28 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
29
30 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
31 [SOURCE_IO] = "io",
32 [SOURCE_TIME_REALTIME] = "realtime",
33 [SOURCE_TIME_BOOTTIME] = "bootime",
34 [SOURCE_TIME_MONOTONIC] = "monotonic",
35 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
36 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
37 [SOURCE_SIGNAL] = "signal",
38 [SOURCE_CHILD] = "child",
39 [SOURCE_DEFER] = "defer",
40 [SOURCE_POST] = "post",
41 [SOURCE_EXIT] = "exit",
42 [SOURCE_WATCHDOG] = "watchdog",
43 [SOURCE_INOTIFY] = "inotify",
44 };
45
46 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
47
48 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
49
50 struct sd_event {
51 unsigned n_ref;
52
53 int epoll_fd;
54 int watchdog_fd;
55
56 Prioq *pending;
57 Prioq *prepare;
58
59 /* timerfd_create() only supports these five clocks so far. We
60 * can add support for more clocks when the kernel learns to
61 * deal with them, too. */
62 struct clock_data realtime;
63 struct clock_data boottime;
64 struct clock_data monotonic;
65 struct clock_data realtime_alarm;
66 struct clock_data boottime_alarm;
67
68 usec_t perturb;
69
70 sd_event_source **signal_sources; /* indexed by signal number */
71 Hashmap *signal_data; /* indexed by priority */
72
73 Hashmap *child_sources;
74 unsigned n_enabled_child_sources;
75
76 Set *post_sources;
77
78 Prioq *exit;
79
80 Hashmap *inotify_data; /* indexed by priority */
81
82 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
83 LIST_HEAD(struct inode_data, inode_data_to_close);
84
85 /* A list of inotify objects that already have events buffered which aren't processed yet */
86 LIST_HEAD(struct inotify_data, inotify_data_buffered);
87
88 pid_t original_pid;
89
90 uint64_t iteration;
91 triple_timestamp timestamp;
92 int state;
93
94 bool exit_requested:1;
95 bool need_process_child:1;
96 bool watchdog:1;
97 bool profile_delays:1;
98
99 int exit_code;
100
101 pid_t tid;
102 sd_event **default_event_ptr;
103
104 usec_t watchdog_last, watchdog_period;
105
106 unsigned n_sources;
107
108 LIST_HEAD(sd_event_source, sources);
109
110 usec_t last_run, last_log;
111 unsigned delays[sizeof(usec_t) * 8];
112 };
113
114 static thread_local sd_event *default_event = NULL;
115
116 static void source_disconnect(sd_event_source *s);
117 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
118
119 static sd_event *event_resolve(sd_event *e) {
120 return e == SD_EVENT_DEFAULT ? default_event : e;
121 }
122
123 static int pending_prioq_compare(const void *a, const void *b) {
124 const sd_event_source *x = a, *y = b;
125 int r;
126
127 assert(x->pending);
128 assert(y->pending);
129
130 /* Enabled ones first */
131 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
132 return -1;
133 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
134 return 1;
135
136 /* Lower priority values first */
137 r = CMP(x->priority, y->priority);
138 if (r != 0)
139 return r;
140
141 /* Older entries first */
142 return CMP(x->pending_iteration, y->pending_iteration);
143 }
144
145 static int prepare_prioq_compare(const void *a, const void *b) {
146 const sd_event_source *x = a, *y = b;
147 int r;
148
149 assert(x->prepare);
150 assert(y->prepare);
151
152 /* Enabled ones first */
153 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
154 return -1;
155 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
156 return 1;
157
158 /* Move most recently prepared ones last, so that we can stop
159 * preparing as soon as we hit one that has already been
160 * prepared in the current iteration */
161 r = CMP(x->prepare_iteration, y->prepare_iteration);
162 if (r != 0)
163 return r;
164
165 /* Lower priority values first */
166 return CMP(x->priority, y->priority);
167 }
168
169 static int earliest_time_prioq_compare(const void *a, const void *b) {
170 const sd_event_source *x = a, *y = b;
171
172 assert(EVENT_SOURCE_IS_TIME(x->type));
173 assert(x->type == y->type);
174
175 /* Enabled ones first */
176 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
177 return -1;
178 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
179 return 1;
180
181 /* Move the pending ones to the end */
182 if (!x->pending && y->pending)
183 return -1;
184 if (x->pending && !y->pending)
185 return 1;
186
187 /* Order by time */
188 return CMP(x->time.next, y->time.next);
189 }
190
191 static usec_t time_event_source_latest(const sd_event_source *s) {
192 return usec_add(s->time.next, s->time.accuracy);
193 }
194
195 static int latest_time_prioq_compare(const void *a, const void *b) {
196 const sd_event_source *x = a, *y = b;
197
198 assert(EVENT_SOURCE_IS_TIME(x->type));
199 assert(x->type == y->type);
200
201 /* Enabled ones first */
202 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203 return -1;
204 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205 return 1;
206
207 /* Move the pending ones to the end */
208 if (!x->pending && y->pending)
209 return -1;
210 if (x->pending && !y->pending)
211 return 1;
212
213 /* Order by time */
214 return CMP(time_event_source_latest(x), time_event_source_latest(y));
215 }
216
217 static int exit_prioq_compare(const void *a, const void *b) {
218 const sd_event_source *x = a, *y = b;
219
220 assert(x->type == SOURCE_EXIT);
221 assert(y->type == SOURCE_EXIT);
222
223 /* Enabled ones first */
224 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
225 return -1;
226 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
227 return 1;
228
229 /* Lower priority values first */
230 return CMP(x->priority, y->priority);
231 }
232
233 static void free_clock_data(struct clock_data *d) {
234 assert(d);
235 assert(d->wakeup == WAKEUP_CLOCK_DATA);
236
237 safe_close(d->fd);
238 prioq_free(d->earliest);
239 prioq_free(d->latest);
240 }
241
242 static sd_event *event_free(sd_event *e) {
243 sd_event_source *s;
244
245 assert(e);
246
247 while ((s = e->sources)) {
248 assert(s->floating);
249 source_disconnect(s);
250 sd_event_source_unref(s);
251 }
252
253 assert(e->n_sources == 0);
254
255 if (e->default_event_ptr)
256 *(e->default_event_ptr) = NULL;
257
258 safe_close(e->epoll_fd);
259 safe_close(e->watchdog_fd);
260
261 free_clock_data(&e->realtime);
262 free_clock_data(&e->boottime);
263 free_clock_data(&e->monotonic);
264 free_clock_data(&e->realtime_alarm);
265 free_clock_data(&e->boottime_alarm);
266
267 prioq_free(e->pending);
268 prioq_free(e->prepare);
269 prioq_free(e->exit);
270
271 free(e->signal_sources);
272 hashmap_free(e->signal_data);
273
274 hashmap_free(e->inotify_data);
275
276 hashmap_free(e->child_sources);
277 set_free(e->post_sources);
278
279 return mfree(e);
280 }
281
282 _public_ int sd_event_new(sd_event** ret) {
283 sd_event *e;
284 int r;
285
286 assert_return(ret, -EINVAL);
287
288 e = new(sd_event, 1);
289 if (!e)
290 return -ENOMEM;
291
292 *e = (sd_event) {
293 .n_ref = 1,
294 .epoll_fd = -1,
295 .watchdog_fd = -1,
296 .realtime.wakeup = WAKEUP_CLOCK_DATA,
297 .realtime.fd = -1,
298 .realtime.next = USEC_INFINITY,
299 .boottime.wakeup = WAKEUP_CLOCK_DATA,
300 .boottime.fd = -1,
301 .boottime.next = USEC_INFINITY,
302 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
303 .monotonic.fd = -1,
304 .monotonic.next = USEC_INFINITY,
305 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
306 .realtime_alarm.fd = -1,
307 .realtime_alarm.next = USEC_INFINITY,
308 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
309 .boottime_alarm.fd = -1,
310 .boottime_alarm.next = USEC_INFINITY,
311 .perturb = USEC_INFINITY,
312 .original_pid = getpid_cached(),
313 };
314
315 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
316 if (r < 0)
317 goto fail;
318
319 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
320 if (e->epoll_fd < 0) {
321 r = -errno;
322 goto fail;
323 }
324
325 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
326
327 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
328 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
329 e->profile_delays = true;
330 }
331
332 *ret = e;
333 return 0;
334
335 fail:
336 event_free(e);
337 return r;
338 }
339
340 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
341
342 static bool event_pid_changed(sd_event *e) {
343 assert(e);
344
345 /* We don't support people creating an event loop and keeping
346 * it around over a fork(). Let's complain. */
347
348 return e->original_pid != getpid_cached();
349 }
350
351 static void source_io_unregister(sd_event_source *s) {
352 int r;
353
354 assert(s);
355 assert(s->type == SOURCE_IO);
356
357 if (event_pid_changed(s->event))
358 return;
359
360 if (!s->io.registered)
361 return;
362
363 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
364 if (r < 0)
365 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
366 strna(s->description), event_source_type_to_string(s->type));
367
368 s->io.registered = false;
369 }
370
371 static int source_io_register(
372 sd_event_source *s,
373 int enabled,
374 uint32_t events) {
375
376 struct epoll_event ev;
377 int r;
378
379 assert(s);
380 assert(s->type == SOURCE_IO);
381 assert(enabled != SD_EVENT_OFF);
382
383 ev = (struct epoll_event) {
384 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
385 .data.ptr = s,
386 };
387
388 if (s->io.registered)
389 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
390 else
391 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
392 if (r < 0)
393 return -errno;
394
395 s->io.registered = true;
396
397 return 0;
398 }
399
400 static clockid_t event_source_type_to_clock(EventSourceType t) {
401
402 switch (t) {
403
404 case SOURCE_TIME_REALTIME:
405 return CLOCK_REALTIME;
406
407 case SOURCE_TIME_BOOTTIME:
408 return CLOCK_BOOTTIME;
409
410 case SOURCE_TIME_MONOTONIC:
411 return CLOCK_MONOTONIC;
412
413 case SOURCE_TIME_REALTIME_ALARM:
414 return CLOCK_REALTIME_ALARM;
415
416 case SOURCE_TIME_BOOTTIME_ALARM:
417 return CLOCK_BOOTTIME_ALARM;
418
419 default:
420 return (clockid_t) -1;
421 }
422 }
423
424 static EventSourceType clock_to_event_source_type(clockid_t clock) {
425
426 switch (clock) {
427
428 case CLOCK_REALTIME:
429 return SOURCE_TIME_REALTIME;
430
431 case CLOCK_BOOTTIME:
432 return SOURCE_TIME_BOOTTIME;
433
434 case CLOCK_MONOTONIC:
435 return SOURCE_TIME_MONOTONIC;
436
437 case CLOCK_REALTIME_ALARM:
438 return SOURCE_TIME_REALTIME_ALARM;
439
440 case CLOCK_BOOTTIME_ALARM:
441 return SOURCE_TIME_BOOTTIME_ALARM;
442
443 default:
444 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
445 }
446 }
447
448 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
449 assert(e);
450
451 switch (t) {
452
453 case SOURCE_TIME_REALTIME:
454 return &e->realtime;
455
456 case SOURCE_TIME_BOOTTIME:
457 return &e->boottime;
458
459 case SOURCE_TIME_MONOTONIC:
460 return &e->monotonic;
461
462 case SOURCE_TIME_REALTIME_ALARM:
463 return &e->realtime_alarm;
464
465 case SOURCE_TIME_BOOTTIME_ALARM:
466 return &e->boottime_alarm;
467
468 default:
469 return NULL;
470 }
471 }
472
473 static int event_make_signal_data(
474 sd_event *e,
475 int sig,
476 struct signal_data **ret) {
477
478 struct epoll_event ev;
479 struct signal_data *d;
480 bool added = false;
481 sigset_t ss_copy;
482 int64_t priority;
483 int r;
484
485 assert(e);
486
487 if (event_pid_changed(e))
488 return -ECHILD;
489
490 if (e->signal_sources && e->signal_sources[sig])
491 priority = e->signal_sources[sig]->priority;
492 else
493 priority = SD_EVENT_PRIORITY_NORMAL;
494
495 d = hashmap_get(e->signal_data, &priority);
496 if (d) {
497 if (sigismember(&d->sigset, sig) > 0) {
498 if (ret)
499 *ret = d;
500 return 0;
501 }
502 } else {
503 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
504 if (r < 0)
505 return r;
506
507 d = new(struct signal_data, 1);
508 if (!d)
509 return -ENOMEM;
510
511 *d = (struct signal_data) {
512 .wakeup = WAKEUP_SIGNAL_DATA,
513 .fd = -1,
514 .priority = priority,
515 };
516
517 r = hashmap_put(e->signal_data, &d->priority, d);
518 if (r < 0) {
519 free(d);
520 return r;
521 }
522
523 added = true;
524 }
525
526 ss_copy = d->sigset;
527 assert_se(sigaddset(&ss_copy, sig) >= 0);
528
529 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
530 if (r < 0) {
531 r = -errno;
532 goto fail;
533 }
534
535 d->sigset = ss_copy;
536
537 if (d->fd >= 0) {
538 if (ret)
539 *ret = d;
540 return 0;
541 }
542
543 d->fd = fd_move_above_stdio(r);
544
545 ev = (struct epoll_event) {
546 .events = EPOLLIN,
547 .data.ptr = d,
548 };
549
550 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
551 if (r < 0) {
552 r = -errno;
553 goto fail;
554 }
555
556 if (ret)
557 *ret = d;
558
559 return 0;
560
561 fail:
562 if (added) {
563 d->fd = safe_close(d->fd);
564 hashmap_remove(e->signal_data, &d->priority);
565 free(d);
566 }
567
568 return r;
569 }
570
571 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
572 assert(e);
573 assert(d);
574
575 /* Turns off the specified signal in the signal data
576 * object. If the signal mask of the object becomes empty that
577 * way removes it. */
578
579 if (sigismember(&d->sigset, sig) == 0)
580 return;
581
582 assert_se(sigdelset(&d->sigset, sig) >= 0);
583
584 if (sigisemptyset(&d->sigset)) {
585
586 /* If all the mask is all-zero we can get rid of the structure */
587 hashmap_remove(e->signal_data, &d->priority);
588 safe_close(d->fd);
589 free(d);
590 return;
591 }
592
593 assert(d->fd >= 0);
594
595 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
596 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
597 }
598
599 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
600 struct signal_data *d;
601 static const int64_t zero_priority = 0;
602
603 assert(e);
604
605 /* Rechecks if the specified signal is still something we are
606 * interested in. If not, we'll unmask it, and possibly drop
607 * the signalfd for it. */
608
609 if (sig == SIGCHLD &&
610 e->n_enabled_child_sources > 0)
611 return;
612
613 if (e->signal_sources &&
614 e->signal_sources[sig] &&
615 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
616 return;
617
618 /*
619 * The specified signal might be enabled in three different queues:
620 *
621 * 1) the one that belongs to the priority passed (if it is non-NULL)
622 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
623 * 3) the 0 priority (to cover the SIGCHLD case)
624 *
625 * Hence, let's remove it from all three here.
626 */
627
628 if (priority) {
629 d = hashmap_get(e->signal_data, priority);
630 if (d)
631 event_unmask_signal_data(e, d, sig);
632 }
633
634 if (e->signal_sources && e->signal_sources[sig]) {
635 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
636 if (d)
637 event_unmask_signal_data(e, d, sig);
638 }
639
640 d = hashmap_get(e->signal_data, &zero_priority);
641 if (d)
642 event_unmask_signal_data(e, d, sig);
643 }
644
645 static void source_disconnect(sd_event_source *s) {
646 sd_event *event;
647
648 assert(s);
649
650 if (!s->event)
651 return;
652
653 assert(s->event->n_sources > 0);
654
655 switch (s->type) {
656
657 case SOURCE_IO:
658 if (s->io.fd >= 0)
659 source_io_unregister(s);
660
661 break;
662
663 case SOURCE_TIME_REALTIME:
664 case SOURCE_TIME_BOOTTIME:
665 case SOURCE_TIME_MONOTONIC:
666 case SOURCE_TIME_REALTIME_ALARM:
667 case SOURCE_TIME_BOOTTIME_ALARM: {
668 struct clock_data *d;
669
670 d = event_get_clock_data(s->event, s->type);
671 assert(d);
672
673 prioq_remove(d->earliest, s, &s->time.earliest_index);
674 prioq_remove(d->latest, s, &s->time.latest_index);
675 d->needs_rearm = true;
676 break;
677 }
678
679 case SOURCE_SIGNAL:
680 if (s->signal.sig > 0) {
681
682 if (s->event->signal_sources)
683 s->event->signal_sources[s->signal.sig] = NULL;
684
685 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
686 }
687
688 break;
689
690 case SOURCE_CHILD:
691 if (s->child.pid > 0) {
692 if (s->enabled != SD_EVENT_OFF) {
693 assert(s->event->n_enabled_child_sources > 0);
694 s->event->n_enabled_child_sources--;
695 }
696
697 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
698 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
699 }
700
701 break;
702
703 case SOURCE_DEFER:
704 /* nothing */
705 break;
706
707 case SOURCE_POST:
708 set_remove(s->event->post_sources, s);
709 break;
710
711 case SOURCE_EXIT:
712 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
713 break;
714
715 case SOURCE_INOTIFY: {
716 struct inode_data *inode_data;
717
718 inode_data = s->inotify.inode_data;
719 if (inode_data) {
720 struct inotify_data *inotify_data;
721 assert_se(inotify_data = inode_data->inotify_data);
722
723 /* Detach this event source from the inode object */
724 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
725 s->inotify.inode_data = NULL;
726
727 if (s->pending) {
728 assert(inotify_data->n_pending > 0);
729 inotify_data->n_pending--;
730 }
731
732 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
733 * continued to being watched. That's because inotify doesn't really have an API for that: we
734 * can only change watch masks with access to the original inode either by fd or by path. But
735 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
736 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
737 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
738 * there), but given the need for open_by_handle_at() which is privileged and not universally
739 * available this would be quite an incomplete solution. Hence we go the other way, leave the
740 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
741 * anymore after reception. Yes, this sucks, but … Linux … */
742
743 /* Maybe release the inode data (and its inotify) */
744 event_gc_inode_data(s->event, inode_data);
745 }
746
747 break;
748 }
749
750 default:
751 assert_not_reached("Wut? I shouldn't exist.");
752 }
753
754 if (s->pending)
755 prioq_remove(s->event->pending, s, &s->pending_index);
756
757 if (s->prepare)
758 prioq_remove(s->event->prepare, s, &s->prepare_index);
759
760 event = s->event;
761
762 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
763 s->event = NULL;
764 LIST_REMOVE(sources, event->sources, s);
765 event->n_sources--;
766
767 if (!s->floating)
768 sd_event_unref(event);
769 }
770
771 static void source_free(sd_event_source *s) {
772 assert(s);
773
774 source_disconnect(s);
775
776 if (s->type == SOURCE_IO && s->io.owned)
777 s->io.fd = safe_close(s->io.fd);
778
779 if (s->destroy_callback)
780 s->destroy_callback(s->userdata);
781
782 free(s->description);
783 free(s);
784 }
785 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
786
787 static int source_set_pending(sd_event_source *s, bool b) {
788 int r;
789
790 assert(s);
791 assert(s->type != SOURCE_EXIT);
792
793 if (s->pending == b)
794 return 0;
795
796 s->pending = b;
797
798 if (b) {
799 s->pending_iteration = s->event->iteration;
800
801 r = prioq_put(s->event->pending, s, &s->pending_index);
802 if (r < 0) {
803 s->pending = false;
804 return r;
805 }
806 } else
807 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
808
809 if (EVENT_SOURCE_IS_TIME(s->type)) {
810 struct clock_data *d;
811
812 d = event_get_clock_data(s->event, s->type);
813 assert(d);
814
815 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
816 prioq_reshuffle(d->latest, s, &s->time.latest_index);
817 d->needs_rearm = true;
818 }
819
820 if (s->type == SOURCE_SIGNAL && !b) {
821 struct signal_data *d;
822
823 d = hashmap_get(s->event->signal_data, &s->priority);
824 if (d && d->current == s)
825 d->current = NULL;
826 }
827
828 if (s->type == SOURCE_INOTIFY) {
829
830 assert(s->inotify.inode_data);
831 assert(s->inotify.inode_data->inotify_data);
832
833 if (b)
834 s->inotify.inode_data->inotify_data->n_pending ++;
835 else {
836 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
837 s->inotify.inode_data->inotify_data->n_pending --;
838 }
839 }
840
841 return 0;
842 }
843
844 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
845 sd_event_source *s;
846
847 assert(e);
848
849 s = new(sd_event_source, 1);
850 if (!s)
851 return NULL;
852
853 *s = (struct sd_event_source) {
854 .n_ref = 1,
855 .event = e,
856 .floating = floating,
857 .type = type,
858 .pending_index = PRIOQ_IDX_NULL,
859 .prepare_index = PRIOQ_IDX_NULL,
860 };
861
862 if (!floating)
863 sd_event_ref(e);
864
865 LIST_PREPEND(sources, e->sources, s);
866 e->n_sources++;
867
868 return s;
869 }
870
871 _public_ int sd_event_add_io(
872 sd_event *e,
873 sd_event_source **ret,
874 int fd,
875 uint32_t events,
876 sd_event_io_handler_t callback,
877 void *userdata) {
878
879 _cleanup_(source_freep) sd_event_source *s = NULL;
880 int r;
881
882 assert_return(e, -EINVAL);
883 assert_return(e = event_resolve(e), -ENOPKG);
884 assert_return(fd >= 0, -EBADF);
885 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
886 assert_return(callback, -EINVAL);
887 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
888 assert_return(!event_pid_changed(e), -ECHILD);
889
890 s = source_new(e, !ret, SOURCE_IO);
891 if (!s)
892 return -ENOMEM;
893
894 s->wakeup = WAKEUP_EVENT_SOURCE;
895 s->io.fd = fd;
896 s->io.events = events;
897 s->io.callback = callback;
898 s->userdata = userdata;
899 s->enabled = SD_EVENT_ON;
900
901 r = source_io_register(s, s->enabled, events);
902 if (r < 0)
903 return r;
904
905 if (ret)
906 *ret = s;
907 TAKE_PTR(s);
908
909 return 0;
910 }
911
912 static void initialize_perturb(sd_event *e) {
913 sd_id128_t bootid = {};
914
915 /* When we sleep for longer, we try to realign the wakeup to
916 the same time within each minute/second/250ms, so that
917 events all across the system can be coalesced into a single
918 CPU wakeup. However, let's take some system-specific
919 randomness for this value, so that in a network of systems
920 with synced clocks timer events are distributed a
921 bit. Here, we calculate a perturbation usec offset from the
922 boot ID. */
923
924 if (_likely_(e->perturb != USEC_INFINITY))
925 return;
926
927 if (sd_id128_get_boot(&bootid) >= 0)
928 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
929 }
930
931 static int event_setup_timer_fd(
932 sd_event *e,
933 struct clock_data *d,
934 clockid_t clock) {
935
936 struct epoll_event ev;
937 int r, fd;
938
939 assert(e);
940 assert(d);
941
942 if (_likely_(d->fd >= 0))
943 return 0;
944
945 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
946 if (fd < 0)
947 return -errno;
948
949 fd = fd_move_above_stdio(fd);
950
951 ev = (struct epoll_event) {
952 .events = EPOLLIN,
953 .data.ptr = d,
954 };
955
956 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
957 if (r < 0) {
958 safe_close(fd);
959 return -errno;
960 }
961
962 d->fd = fd;
963 return 0;
964 }
965
966 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
967 assert(s);
968
969 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
970 }
971
972 _public_ int sd_event_add_time(
973 sd_event *e,
974 sd_event_source **ret,
975 clockid_t clock,
976 uint64_t usec,
977 uint64_t accuracy,
978 sd_event_time_handler_t callback,
979 void *userdata) {
980
981 EventSourceType type;
982 _cleanup_(source_freep) sd_event_source *s = NULL;
983 struct clock_data *d;
984 int r;
985
986 assert_return(e, -EINVAL);
987 assert_return(e = event_resolve(e), -ENOPKG);
988 assert_return(accuracy != (uint64_t) -1, -EINVAL);
989 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
990 assert_return(!event_pid_changed(e), -ECHILD);
991
992 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
993 return -EOPNOTSUPP;
994
995 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
996 if (type < 0)
997 return -EOPNOTSUPP;
998
999 if (!callback)
1000 callback = time_exit_callback;
1001
1002 d = event_get_clock_data(e, type);
1003 assert(d);
1004
1005 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1006 if (r < 0)
1007 return r;
1008
1009 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1010 if (r < 0)
1011 return r;
1012
1013 if (d->fd < 0) {
1014 r = event_setup_timer_fd(e, d, clock);
1015 if (r < 0)
1016 return r;
1017 }
1018
1019 s = source_new(e, !ret, type);
1020 if (!s)
1021 return -ENOMEM;
1022
1023 s->time.next = usec;
1024 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1025 s->time.callback = callback;
1026 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1027 s->userdata = userdata;
1028 s->enabled = SD_EVENT_ONESHOT;
1029
1030 d->needs_rearm = true;
1031
1032 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1033 if (r < 0)
1034 return r;
1035
1036 r = prioq_put(d->latest, s, &s->time.latest_index);
1037 if (r < 0)
1038 return r;
1039
1040 if (ret)
1041 *ret = s;
1042 TAKE_PTR(s);
1043
1044 return 0;
1045 }
1046
1047 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1048 assert(s);
1049
1050 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1051 }
1052
1053 _public_ int sd_event_add_signal(
1054 sd_event *e,
1055 sd_event_source **ret,
1056 int sig,
1057 sd_event_signal_handler_t callback,
1058 void *userdata) {
1059
1060 _cleanup_(source_freep) sd_event_source *s = NULL;
1061 struct signal_data *d;
1062 sigset_t ss;
1063 int r;
1064
1065 assert_return(e, -EINVAL);
1066 assert_return(e = event_resolve(e), -ENOPKG);
1067 assert_return(SIGNAL_VALID(sig), -EINVAL);
1068 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1069 assert_return(!event_pid_changed(e), -ECHILD);
1070
1071 if (!callback)
1072 callback = signal_exit_callback;
1073
1074 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1075 if (r != 0)
1076 return -r;
1077
1078 if (!sigismember(&ss, sig))
1079 return -EBUSY;
1080
1081 if (!e->signal_sources) {
1082 e->signal_sources = new0(sd_event_source*, _NSIG);
1083 if (!e->signal_sources)
1084 return -ENOMEM;
1085 } else if (e->signal_sources[sig])
1086 return -EBUSY;
1087
1088 s = source_new(e, !ret, SOURCE_SIGNAL);
1089 if (!s)
1090 return -ENOMEM;
1091
1092 s->signal.sig = sig;
1093 s->signal.callback = callback;
1094 s->userdata = userdata;
1095 s->enabled = SD_EVENT_ON;
1096
1097 e->signal_sources[sig] = s;
1098
1099 r = event_make_signal_data(e, sig, &d);
1100 if (r < 0)
1101 return r;
1102
1103 /* Use the signal name as description for the event source by default */
1104 (void) sd_event_source_set_description(s, signal_to_string(sig));
1105
1106 if (ret)
1107 *ret = s;
1108 TAKE_PTR(s);
1109
1110 return 0;
1111 }
1112
1113 _public_ int sd_event_add_child(
1114 sd_event *e,
1115 sd_event_source **ret,
1116 pid_t pid,
1117 int options,
1118 sd_event_child_handler_t callback,
1119 void *userdata) {
1120
1121 _cleanup_(source_freep) sd_event_source *s = NULL;
1122 int r;
1123
1124 assert_return(e, -EINVAL);
1125 assert_return(e = event_resolve(e), -ENOPKG);
1126 assert_return(pid > 1, -EINVAL);
1127 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1128 assert_return(options != 0, -EINVAL);
1129 assert_return(callback, -EINVAL);
1130 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1131 assert_return(!event_pid_changed(e), -ECHILD);
1132
1133 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1134 if (r < 0)
1135 return r;
1136
1137 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1138 return -EBUSY;
1139
1140 s = source_new(e, !ret, SOURCE_CHILD);
1141 if (!s)
1142 return -ENOMEM;
1143
1144 s->child.pid = pid;
1145 s->child.options = options;
1146 s->child.callback = callback;
1147 s->userdata = userdata;
1148 s->enabled = SD_EVENT_ONESHOT;
1149
1150 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1151 if (r < 0)
1152 return r;
1153
1154 e->n_enabled_child_sources++;
1155
1156 r = event_make_signal_data(e, SIGCHLD, NULL);
1157 if (r < 0) {
1158 e->n_enabled_child_sources--;
1159 return r;
1160 }
1161
1162 e->need_process_child = true;
1163
1164 if (ret)
1165 *ret = s;
1166 TAKE_PTR(s);
1167
1168 return 0;
1169 }
1170
1171 _public_ int sd_event_add_defer(
1172 sd_event *e,
1173 sd_event_source **ret,
1174 sd_event_handler_t callback,
1175 void *userdata) {
1176
1177 _cleanup_(source_freep) sd_event_source *s = NULL;
1178 int r;
1179
1180 assert_return(e, -EINVAL);
1181 assert_return(e = event_resolve(e), -ENOPKG);
1182 assert_return(callback, -EINVAL);
1183 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1184 assert_return(!event_pid_changed(e), -ECHILD);
1185
1186 s = source_new(e, !ret, SOURCE_DEFER);
1187 if (!s)
1188 return -ENOMEM;
1189
1190 s->defer.callback = callback;
1191 s->userdata = userdata;
1192 s->enabled = SD_EVENT_ONESHOT;
1193
1194 r = source_set_pending(s, true);
1195 if (r < 0)
1196 return r;
1197
1198 if (ret)
1199 *ret = s;
1200 TAKE_PTR(s);
1201
1202 return 0;
1203 }
1204
1205 _public_ int sd_event_add_post(
1206 sd_event *e,
1207 sd_event_source **ret,
1208 sd_event_handler_t callback,
1209 void *userdata) {
1210
1211 _cleanup_(source_freep) sd_event_source *s = NULL;
1212 int r;
1213
1214 assert_return(e, -EINVAL);
1215 assert_return(e = event_resolve(e), -ENOPKG);
1216 assert_return(callback, -EINVAL);
1217 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1218 assert_return(!event_pid_changed(e), -ECHILD);
1219
1220 r = set_ensure_allocated(&e->post_sources, NULL);
1221 if (r < 0)
1222 return r;
1223
1224 s = source_new(e, !ret, SOURCE_POST);
1225 if (!s)
1226 return -ENOMEM;
1227
1228 s->post.callback = callback;
1229 s->userdata = userdata;
1230 s->enabled = SD_EVENT_ON;
1231
1232 r = set_put(e->post_sources, s);
1233 if (r < 0)
1234 return r;
1235
1236 if (ret)
1237 *ret = s;
1238 TAKE_PTR(s);
1239
1240 return 0;
1241 }
1242
1243 _public_ int sd_event_add_exit(
1244 sd_event *e,
1245 sd_event_source **ret,
1246 sd_event_handler_t callback,
1247 void *userdata) {
1248
1249 _cleanup_(source_freep) sd_event_source *s = NULL;
1250 int r;
1251
1252 assert_return(e, -EINVAL);
1253 assert_return(e = event_resolve(e), -ENOPKG);
1254 assert_return(callback, -EINVAL);
1255 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1256 assert_return(!event_pid_changed(e), -ECHILD);
1257
1258 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1259 if (r < 0)
1260 return r;
1261
1262 s = source_new(e, !ret, SOURCE_EXIT);
1263 if (!s)
1264 return -ENOMEM;
1265
1266 s->exit.callback = callback;
1267 s->userdata = userdata;
1268 s->exit.prioq_index = PRIOQ_IDX_NULL;
1269 s->enabled = SD_EVENT_ONESHOT;
1270
1271 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1272 if (r < 0)
1273 return r;
1274
1275 if (ret)
1276 *ret = s;
1277 TAKE_PTR(s);
1278
1279 return 0;
1280 }
1281
1282 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1283 assert(e);
1284
1285 if (!d)
1286 return;
1287
1288 assert(hashmap_isempty(d->inodes));
1289 assert(hashmap_isempty(d->wd));
1290
1291 if (d->buffer_filled > 0)
1292 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1293
1294 hashmap_free(d->inodes);
1295 hashmap_free(d->wd);
1296
1297 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1298
1299 if (d->fd >= 0) {
1300 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1301 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1302
1303 safe_close(d->fd);
1304 }
1305 free(d);
1306 }
1307
1308 static int event_make_inotify_data(
1309 sd_event *e,
1310 int64_t priority,
1311 struct inotify_data **ret) {
1312
1313 _cleanup_close_ int fd = -1;
1314 struct inotify_data *d;
1315 struct epoll_event ev;
1316 int r;
1317
1318 assert(e);
1319
1320 d = hashmap_get(e->inotify_data, &priority);
1321 if (d) {
1322 if (ret)
1323 *ret = d;
1324 return 0;
1325 }
1326
1327 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1328 if (fd < 0)
1329 return -errno;
1330
1331 fd = fd_move_above_stdio(fd);
1332
1333 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1334 if (r < 0)
1335 return r;
1336
1337 d = new(struct inotify_data, 1);
1338 if (!d)
1339 return -ENOMEM;
1340
1341 *d = (struct inotify_data) {
1342 .wakeup = WAKEUP_INOTIFY_DATA,
1343 .fd = TAKE_FD(fd),
1344 .priority = priority,
1345 };
1346
1347 r = hashmap_put(e->inotify_data, &d->priority, d);
1348 if (r < 0) {
1349 d->fd = safe_close(d->fd);
1350 free(d);
1351 return r;
1352 }
1353
1354 ev = (struct epoll_event) {
1355 .events = EPOLLIN,
1356 .data.ptr = d,
1357 };
1358
1359 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1360 r = -errno;
1361 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1362 * remove the fd from the epoll first, which we don't want as we couldn't
1363 * add it in the first place. */
1364 event_free_inotify_data(e, d);
1365 return r;
1366 }
1367
1368 if (ret)
1369 *ret = d;
1370
1371 return 1;
1372 }
1373
1374 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1375 int r;
1376
1377 assert(x);
1378 assert(y);
1379
1380 r = CMP(x->dev, y->dev);
1381 if (r != 0)
1382 return r;
1383
1384 return CMP(x->ino, y->ino);
1385 }
1386
1387 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1388 assert(d);
1389
1390 siphash24_compress(&d->dev, sizeof(d->dev), state);
1391 siphash24_compress(&d->ino, sizeof(d->ino), state);
1392 }
1393
1394 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1395
1396 static void event_free_inode_data(
1397 sd_event *e,
1398 struct inode_data *d) {
1399
1400 assert(e);
1401
1402 if (!d)
1403 return;
1404
1405 assert(!d->event_sources);
1406
1407 if (d->fd >= 0) {
1408 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1409 safe_close(d->fd);
1410 }
1411
1412 if (d->inotify_data) {
1413
1414 if (d->wd >= 0) {
1415 if (d->inotify_data->fd >= 0) {
1416 /* So here's a problem. At the time this runs the watch descriptor might already be
1417 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1418 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1419 * likely case to happen. */
1420
1421 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1422 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1423 }
1424
1425 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1426 }
1427
1428 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1429 }
1430
1431 free(d);
1432 }
1433
1434 static void event_gc_inode_data(
1435 sd_event *e,
1436 struct inode_data *d) {
1437
1438 struct inotify_data *inotify_data;
1439
1440 assert(e);
1441
1442 if (!d)
1443 return;
1444
1445 if (d->event_sources)
1446 return;
1447
1448 inotify_data = d->inotify_data;
1449 event_free_inode_data(e, d);
1450
1451 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1452 event_free_inotify_data(e, inotify_data);
1453 }
1454
1455 static int event_make_inode_data(
1456 sd_event *e,
1457 struct inotify_data *inotify_data,
1458 dev_t dev,
1459 ino_t ino,
1460 struct inode_data **ret) {
1461
1462 struct inode_data *d, key;
1463 int r;
1464
1465 assert(e);
1466 assert(inotify_data);
1467
1468 key = (struct inode_data) {
1469 .ino = ino,
1470 .dev = dev,
1471 };
1472
1473 d = hashmap_get(inotify_data->inodes, &key);
1474 if (d) {
1475 if (ret)
1476 *ret = d;
1477
1478 return 0;
1479 }
1480
1481 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1482 if (r < 0)
1483 return r;
1484
1485 d = new(struct inode_data, 1);
1486 if (!d)
1487 return -ENOMEM;
1488
1489 *d = (struct inode_data) {
1490 .dev = dev,
1491 .ino = ino,
1492 .wd = -1,
1493 .fd = -1,
1494 .inotify_data = inotify_data,
1495 };
1496
1497 r = hashmap_put(inotify_data->inodes, d, d);
1498 if (r < 0) {
1499 free(d);
1500 return r;
1501 }
1502
1503 if (ret)
1504 *ret = d;
1505
1506 return 1;
1507 }
1508
1509 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1510 bool excl_unlink = true;
1511 uint32_t combined = 0;
1512 sd_event_source *s;
1513
1514 assert(d);
1515
1516 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1517 * the IN_EXCL_UNLINK flag is ANDed instead.
1518 *
1519 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1520 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1521 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1522 * events we don't care for client-side. */
1523
1524 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1525
1526 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1527 excl_unlink = false;
1528
1529 combined |= s->inotify.mask;
1530 }
1531
1532 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1533 }
1534
1535 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1536 uint32_t combined_mask;
1537 int wd, r;
1538
1539 assert(d);
1540 assert(d->fd >= 0);
1541
1542 combined_mask = inode_data_determine_mask(d);
1543
1544 if (d->wd >= 0 && combined_mask == d->combined_mask)
1545 return 0;
1546
1547 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1548 if (r < 0)
1549 return r;
1550
1551 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1552 if (wd < 0)
1553 return -errno;
1554
1555 if (d->wd < 0) {
1556 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1557 if (r < 0) {
1558 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1559 return r;
1560 }
1561
1562 d->wd = wd;
1563
1564 } else if (d->wd != wd) {
1565
1566 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1567 (void) inotify_rm_watch(d->fd, wd);
1568 return -EINVAL;
1569 }
1570
1571 d->combined_mask = combined_mask;
1572 return 1;
1573 }
1574
1575 _public_ int sd_event_add_inotify(
1576 sd_event *e,
1577 sd_event_source **ret,
1578 const char *path,
1579 uint32_t mask,
1580 sd_event_inotify_handler_t callback,
1581 void *userdata) {
1582
1583 struct inotify_data *inotify_data = NULL;
1584 struct inode_data *inode_data = NULL;
1585 _cleanup_close_ int fd = -1;
1586 _cleanup_(source_freep) sd_event_source *s = NULL;
1587 struct stat st;
1588 int r;
1589
1590 assert_return(e, -EINVAL);
1591 assert_return(e = event_resolve(e), -ENOPKG);
1592 assert_return(path, -EINVAL);
1593 assert_return(callback, -EINVAL);
1594 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1595 assert_return(!event_pid_changed(e), -ECHILD);
1596
1597 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1598 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1599 * the user can't use them for us. */
1600 if (mask & IN_MASK_ADD)
1601 return -EINVAL;
1602
1603 fd = open(path, O_PATH|O_CLOEXEC|
1604 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1605 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1606 if (fd < 0)
1607 return -errno;
1608
1609 if (fstat(fd, &st) < 0)
1610 return -errno;
1611
1612 s = source_new(e, !ret, SOURCE_INOTIFY);
1613 if (!s)
1614 return -ENOMEM;
1615
1616 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1617 s->inotify.mask = mask;
1618 s->inotify.callback = callback;
1619 s->userdata = userdata;
1620
1621 /* Allocate an inotify object for this priority, and an inode object within it */
1622 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1623 if (r < 0)
1624 return r;
1625
1626 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1627 if (r < 0) {
1628 event_free_inotify_data(e, inotify_data);
1629 return r;
1630 }
1631
1632 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1633 * the event source, until then, for which we need the original inode. */
1634 if (inode_data->fd < 0) {
1635 inode_data->fd = TAKE_FD(fd);
1636 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1637 }
1638
1639 /* Link our event source to the inode data object */
1640 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1641 s->inotify.inode_data = inode_data;
1642
1643 /* Actually realize the watch now */
1644 r = inode_data_realize_watch(e, inode_data);
1645 if (r < 0)
1646 return r;
1647
1648 (void) sd_event_source_set_description(s, path);
1649
1650 if (ret)
1651 *ret = s;
1652 TAKE_PTR(s);
1653
1654 return 0;
1655 }
1656
1657 static sd_event_source* event_source_free(sd_event_source *s) {
1658 if (!s)
1659 return NULL;
1660
1661 /* Here's a special hack: when we are called from a
1662 * dispatch handler we won't free the event source
1663 * immediately, but we will detach the fd from the
1664 * epoll. This way it is safe for the caller to unref
1665 * the event source and immediately close the fd, but
1666 * we still retain a valid event source object after
1667 * the callback. */
1668
1669 if (s->dispatching) {
1670 if (s->type == SOURCE_IO)
1671 source_io_unregister(s);
1672
1673 source_disconnect(s);
1674 } else
1675 source_free(s);
1676
1677 return NULL;
1678 }
1679
1680 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1681
1682 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1683 assert_return(s, -EINVAL);
1684 assert_return(!event_pid_changed(s->event), -ECHILD);
1685
1686 return free_and_strdup(&s->description, description);
1687 }
1688
1689 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1690 assert_return(s, -EINVAL);
1691 assert_return(description, -EINVAL);
1692 assert_return(!event_pid_changed(s->event), -ECHILD);
1693
1694 if (!s->description)
1695 return -ENXIO;
1696
1697 *description = s->description;
1698 return 0;
1699 }
1700
1701 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1702 assert_return(s, NULL);
1703
1704 return s->event;
1705 }
1706
1707 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1708 assert_return(s, -EINVAL);
1709 assert_return(s->type != SOURCE_EXIT, -EDOM);
1710 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1711 assert_return(!event_pid_changed(s->event), -ECHILD);
1712
1713 return s->pending;
1714 }
1715
1716 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1717 assert_return(s, -EINVAL);
1718 assert_return(s->type == SOURCE_IO, -EDOM);
1719 assert_return(!event_pid_changed(s->event), -ECHILD);
1720
1721 return s->io.fd;
1722 }
1723
1724 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1725 int r;
1726
1727 assert_return(s, -EINVAL);
1728 assert_return(fd >= 0, -EBADF);
1729 assert_return(s->type == SOURCE_IO, -EDOM);
1730 assert_return(!event_pid_changed(s->event), -ECHILD);
1731
1732 if (s->io.fd == fd)
1733 return 0;
1734
1735 if (s->enabled == SD_EVENT_OFF) {
1736 s->io.fd = fd;
1737 s->io.registered = false;
1738 } else {
1739 int saved_fd;
1740
1741 saved_fd = s->io.fd;
1742 assert(s->io.registered);
1743
1744 s->io.fd = fd;
1745 s->io.registered = false;
1746
1747 r = source_io_register(s, s->enabled, s->io.events);
1748 if (r < 0) {
1749 s->io.fd = saved_fd;
1750 s->io.registered = true;
1751 return r;
1752 }
1753
1754 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1755 }
1756
1757 return 0;
1758 }
1759
1760 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1761 assert_return(s, -EINVAL);
1762 assert_return(s->type == SOURCE_IO, -EDOM);
1763
1764 return s->io.owned;
1765 }
1766
1767 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1768 assert_return(s, -EINVAL);
1769 assert_return(s->type == SOURCE_IO, -EDOM);
1770
1771 s->io.owned = own;
1772 return 0;
1773 }
1774
1775 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1776 assert_return(s, -EINVAL);
1777 assert_return(events, -EINVAL);
1778 assert_return(s->type == SOURCE_IO, -EDOM);
1779 assert_return(!event_pid_changed(s->event), -ECHILD);
1780
1781 *events = s->io.events;
1782 return 0;
1783 }
1784
1785 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1786 int r;
1787
1788 assert_return(s, -EINVAL);
1789 assert_return(s->type == SOURCE_IO, -EDOM);
1790 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1791 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1792 assert_return(!event_pid_changed(s->event), -ECHILD);
1793
1794 /* edge-triggered updates are never skipped, so we can reset edges */
1795 if (s->io.events == events && !(events & EPOLLET))
1796 return 0;
1797
1798 r = source_set_pending(s, false);
1799 if (r < 0)
1800 return r;
1801
1802 if (s->enabled != SD_EVENT_OFF) {
1803 r = source_io_register(s, s->enabled, events);
1804 if (r < 0)
1805 return r;
1806 }
1807
1808 s->io.events = events;
1809
1810 return 0;
1811 }
1812
1813 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1814 assert_return(s, -EINVAL);
1815 assert_return(revents, -EINVAL);
1816 assert_return(s->type == SOURCE_IO, -EDOM);
1817 assert_return(s->pending, -ENODATA);
1818 assert_return(!event_pid_changed(s->event), -ECHILD);
1819
1820 *revents = s->io.revents;
1821 return 0;
1822 }
1823
1824 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1825 assert_return(s, -EINVAL);
1826 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1827 assert_return(!event_pid_changed(s->event), -ECHILD);
1828
1829 return s->signal.sig;
1830 }
1831
1832 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1833 assert_return(s, -EINVAL);
1834 assert_return(!event_pid_changed(s->event), -ECHILD);
1835
1836 *priority = s->priority;
1837 return 0;
1838 }
1839
1840 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1841 bool rm_inotify = false, rm_inode = false;
1842 struct inotify_data *new_inotify_data = NULL;
1843 struct inode_data *new_inode_data = NULL;
1844 int r;
1845
1846 assert_return(s, -EINVAL);
1847 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1848 assert_return(!event_pid_changed(s->event), -ECHILD);
1849
1850 if (s->priority == priority)
1851 return 0;
1852
1853 if (s->type == SOURCE_INOTIFY) {
1854 struct inode_data *old_inode_data;
1855
1856 assert(s->inotify.inode_data);
1857 old_inode_data = s->inotify.inode_data;
1858
1859 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1860 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1861 * events we allow priority changes only until the first following iteration. */
1862 if (old_inode_data->fd < 0)
1863 return -EOPNOTSUPP;
1864
1865 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
1866 if (r < 0)
1867 return r;
1868 rm_inotify = r > 0;
1869
1870 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
1871 if (r < 0)
1872 goto fail;
1873 rm_inode = r > 0;
1874
1875 if (new_inode_data->fd < 0) {
1876 /* Duplicate the fd for the new inode object if we don't have any yet */
1877 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
1878 if (new_inode_data->fd < 0) {
1879 r = -errno;
1880 goto fail;
1881 }
1882
1883 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
1884 }
1885
1886 /* Move the event source to the new inode data structure */
1887 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
1888 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
1889 s->inotify.inode_data = new_inode_data;
1890
1891 /* Now create the new watch */
1892 r = inode_data_realize_watch(s->event, new_inode_data);
1893 if (r < 0) {
1894 /* Move it back */
1895 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
1896 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
1897 s->inotify.inode_data = old_inode_data;
1898 goto fail;
1899 }
1900
1901 s->priority = priority;
1902
1903 event_gc_inode_data(s->event, old_inode_data);
1904
1905 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1906 struct signal_data *old, *d;
1907
1908 /* Move us from the signalfd belonging to the old
1909 * priority to the signalfd of the new priority */
1910
1911 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1912
1913 s->priority = priority;
1914
1915 r = event_make_signal_data(s->event, s->signal.sig, &d);
1916 if (r < 0) {
1917 s->priority = old->priority;
1918 return r;
1919 }
1920
1921 event_unmask_signal_data(s->event, old, s->signal.sig);
1922 } else
1923 s->priority = priority;
1924
1925 if (s->pending)
1926 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1927
1928 if (s->prepare)
1929 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1930
1931 if (s->type == SOURCE_EXIT)
1932 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1933
1934 return 0;
1935
1936 fail:
1937 if (rm_inode)
1938 event_free_inode_data(s->event, new_inode_data);
1939
1940 if (rm_inotify)
1941 event_free_inotify_data(s->event, new_inotify_data);
1942
1943 return r;
1944 }
1945
1946 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1947 assert_return(s, -EINVAL);
1948 assert_return(!event_pid_changed(s->event), -ECHILD);
1949
1950 if (m)
1951 *m = s->enabled;
1952 return s->enabled != SD_EVENT_OFF;
1953 }
1954
1955 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1956 int r;
1957
1958 assert_return(s, -EINVAL);
1959 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1960 assert_return(!event_pid_changed(s->event), -ECHILD);
1961
1962 /* If we are dead anyway, we are fine with turning off
1963 * sources, but everything else needs to fail. */
1964 if (s->event->state == SD_EVENT_FINISHED)
1965 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1966
1967 if (s->enabled == m)
1968 return 0;
1969
1970 if (m == SD_EVENT_OFF) {
1971
1972 /* Unset the pending flag when this event source is disabled */
1973 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1974 r = source_set_pending(s, false);
1975 if (r < 0)
1976 return r;
1977 }
1978
1979 switch (s->type) {
1980
1981 case SOURCE_IO:
1982 source_io_unregister(s);
1983 s->enabled = m;
1984 break;
1985
1986 case SOURCE_TIME_REALTIME:
1987 case SOURCE_TIME_BOOTTIME:
1988 case SOURCE_TIME_MONOTONIC:
1989 case SOURCE_TIME_REALTIME_ALARM:
1990 case SOURCE_TIME_BOOTTIME_ALARM: {
1991 struct clock_data *d;
1992
1993 s->enabled = m;
1994 d = event_get_clock_data(s->event, s->type);
1995 assert(d);
1996
1997 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
1998 prioq_reshuffle(d->latest, s, &s->time.latest_index);
1999 d->needs_rearm = true;
2000 break;
2001 }
2002
2003 case SOURCE_SIGNAL:
2004 s->enabled = m;
2005
2006 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2007 break;
2008
2009 case SOURCE_CHILD:
2010 s->enabled = m;
2011
2012 assert(s->event->n_enabled_child_sources > 0);
2013 s->event->n_enabled_child_sources--;
2014
2015 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2016 break;
2017
2018 case SOURCE_EXIT:
2019 s->enabled = m;
2020 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2021 break;
2022
2023 case SOURCE_DEFER:
2024 case SOURCE_POST:
2025 case SOURCE_INOTIFY:
2026 s->enabled = m;
2027 break;
2028
2029 default:
2030 assert_not_reached("Wut? I shouldn't exist.");
2031 }
2032
2033 } else {
2034
2035 /* Unset the pending flag when this event source is enabled */
2036 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2037 r = source_set_pending(s, false);
2038 if (r < 0)
2039 return r;
2040 }
2041
2042 switch (s->type) {
2043
2044 case SOURCE_IO:
2045 r = source_io_register(s, m, s->io.events);
2046 if (r < 0)
2047 return r;
2048
2049 s->enabled = m;
2050 break;
2051
2052 case SOURCE_TIME_REALTIME:
2053 case SOURCE_TIME_BOOTTIME:
2054 case SOURCE_TIME_MONOTONIC:
2055 case SOURCE_TIME_REALTIME_ALARM:
2056 case SOURCE_TIME_BOOTTIME_ALARM: {
2057 struct clock_data *d;
2058
2059 s->enabled = m;
2060 d = event_get_clock_data(s->event, s->type);
2061 assert(d);
2062
2063 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2064 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2065 d->needs_rearm = true;
2066 break;
2067 }
2068
2069 case SOURCE_SIGNAL:
2070
2071 s->enabled = m;
2072
2073 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2074 if (r < 0) {
2075 s->enabled = SD_EVENT_OFF;
2076 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2077 return r;
2078 }
2079
2080 break;
2081
2082 case SOURCE_CHILD:
2083
2084 if (s->enabled == SD_EVENT_OFF)
2085 s->event->n_enabled_child_sources++;
2086
2087 s->enabled = m;
2088
2089 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2090 if (r < 0) {
2091 s->enabled = SD_EVENT_OFF;
2092 s->event->n_enabled_child_sources--;
2093 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2094 return r;
2095 }
2096
2097 break;
2098
2099 case SOURCE_EXIT:
2100 s->enabled = m;
2101 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2102 break;
2103
2104 case SOURCE_DEFER:
2105 case SOURCE_POST:
2106 case SOURCE_INOTIFY:
2107 s->enabled = m;
2108 break;
2109
2110 default:
2111 assert_not_reached("Wut? I shouldn't exist.");
2112 }
2113 }
2114
2115 if (s->pending)
2116 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2117
2118 if (s->prepare)
2119 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2120
2121 return 0;
2122 }
2123
2124 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2125 assert_return(s, -EINVAL);
2126 assert_return(usec, -EINVAL);
2127 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2128 assert_return(!event_pid_changed(s->event), -ECHILD);
2129
2130 *usec = s->time.next;
2131 return 0;
2132 }
2133
2134 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2135 struct clock_data *d;
2136 int r;
2137
2138 assert_return(s, -EINVAL);
2139 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2140 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2141 assert_return(!event_pid_changed(s->event), -ECHILD);
2142
2143 r = source_set_pending(s, false);
2144 if (r < 0)
2145 return r;
2146
2147 s->time.next = usec;
2148
2149 d = event_get_clock_data(s->event, s->type);
2150 assert(d);
2151
2152 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2153 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2154 d->needs_rearm = true;
2155
2156 return 0;
2157 }
2158
2159 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2160 assert_return(s, -EINVAL);
2161 assert_return(usec, -EINVAL);
2162 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2163 assert_return(!event_pid_changed(s->event), -ECHILD);
2164
2165 *usec = s->time.accuracy;
2166 return 0;
2167 }
2168
2169 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2170 struct clock_data *d;
2171 int r;
2172
2173 assert_return(s, -EINVAL);
2174 assert_return(usec != (uint64_t) -1, -EINVAL);
2175 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2176 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2177 assert_return(!event_pid_changed(s->event), -ECHILD);
2178
2179 r = source_set_pending(s, false);
2180 if (r < 0)
2181 return r;
2182
2183 if (usec == 0)
2184 usec = DEFAULT_ACCURACY_USEC;
2185
2186 s->time.accuracy = usec;
2187
2188 d = event_get_clock_data(s->event, s->type);
2189 assert(d);
2190
2191 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2192 d->needs_rearm = true;
2193
2194 return 0;
2195 }
2196
2197 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2198 assert_return(s, -EINVAL);
2199 assert_return(clock, -EINVAL);
2200 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2201 assert_return(!event_pid_changed(s->event), -ECHILD);
2202
2203 *clock = event_source_type_to_clock(s->type);
2204 return 0;
2205 }
2206
2207 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2208 assert_return(s, -EINVAL);
2209 assert_return(pid, -EINVAL);
2210 assert_return(s->type == SOURCE_CHILD, -EDOM);
2211 assert_return(!event_pid_changed(s->event), -ECHILD);
2212
2213 *pid = s->child.pid;
2214 return 0;
2215 }
2216
2217 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2218 assert_return(s, -EINVAL);
2219 assert_return(mask, -EINVAL);
2220 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2221 assert_return(!event_pid_changed(s->event), -ECHILD);
2222
2223 *mask = s->inotify.mask;
2224 return 0;
2225 }
2226
2227 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2228 int r;
2229
2230 assert_return(s, -EINVAL);
2231 assert_return(s->type != SOURCE_EXIT, -EDOM);
2232 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2233 assert_return(!event_pid_changed(s->event), -ECHILD);
2234
2235 if (s->prepare == callback)
2236 return 0;
2237
2238 if (callback && s->prepare) {
2239 s->prepare = callback;
2240 return 0;
2241 }
2242
2243 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2244 if (r < 0)
2245 return r;
2246
2247 s->prepare = callback;
2248
2249 if (callback) {
2250 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2251 if (r < 0)
2252 return r;
2253 } else
2254 prioq_remove(s->event->prepare, s, &s->prepare_index);
2255
2256 return 0;
2257 }
2258
2259 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2260 assert_return(s, NULL);
2261
2262 return s->userdata;
2263 }
2264
2265 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2266 void *ret;
2267
2268 assert_return(s, NULL);
2269
2270 ret = s->userdata;
2271 s->userdata = userdata;
2272
2273 return ret;
2274 }
2275
2276 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2277 usec_t c;
2278 assert(e);
2279 assert(a <= b);
2280
2281 if (a <= 0)
2282 return 0;
2283 if (a >= USEC_INFINITY)
2284 return USEC_INFINITY;
2285
2286 if (b <= a + 1)
2287 return a;
2288
2289 initialize_perturb(e);
2290
2291 /*
2292 Find a good time to wake up again between times a and b. We
2293 have two goals here:
2294
2295 a) We want to wake up as seldom as possible, hence prefer
2296 later times over earlier times.
2297
2298 b) But if we have to wake up, then let's make sure to
2299 dispatch as much as possible on the entire system.
2300
2301 We implement this by waking up everywhere at the same time
2302 within any given minute if we can, synchronised via the
2303 perturbation value determined from the boot ID. If we can't,
2304 then we try to find the same spot in every 10s, then 1s and
2305 then 250ms step. Otherwise, we pick the last possible time
2306 to wake up.
2307 */
2308
2309 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2310 if (c >= b) {
2311 if (_unlikely_(c < USEC_PER_MINUTE))
2312 return b;
2313
2314 c -= USEC_PER_MINUTE;
2315 }
2316
2317 if (c >= a)
2318 return c;
2319
2320 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2321 if (c >= b) {
2322 if (_unlikely_(c < USEC_PER_SEC*10))
2323 return b;
2324
2325 c -= USEC_PER_SEC*10;
2326 }
2327
2328 if (c >= a)
2329 return c;
2330
2331 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2332 if (c >= b) {
2333 if (_unlikely_(c < USEC_PER_SEC))
2334 return b;
2335
2336 c -= USEC_PER_SEC;
2337 }
2338
2339 if (c >= a)
2340 return c;
2341
2342 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2343 if (c >= b) {
2344 if (_unlikely_(c < USEC_PER_MSEC*250))
2345 return b;
2346
2347 c -= USEC_PER_MSEC*250;
2348 }
2349
2350 if (c >= a)
2351 return c;
2352
2353 return b;
2354 }
2355
2356 static int event_arm_timer(
2357 sd_event *e,
2358 struct clock_data *d) {
2359
2360 struct itimerspec its = {};
2361 sd_event_source *a, *b;
2362 usec_t t;
2363 int r;
2364
2365 assert(e);
2366 assert(d);
2367
2368 if (!d->needs_rearm)
2369 return 0;
2370 else
2371 d->needs_rearm = false;
2372
2373 a = prioq_peek(d->earliest);
2374 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2375
2376 if (d->fd < 0)
2377 return 0;
2378
2379 if (d->next == USEC_INFINITY)
2380 return 0;
2381
2382 /* disarm */
2383 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2384 if (r < 0)
2385 return r;
2386
2387 d->next = USEC_INFINITY;
2388 return 0;
2389 }
2390
2391 b = prioq_peek(d->latest);
2392 assert_se(b && b->enabled != SD_EVENT_OFF);
2393
2394 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2395 if (d->next == t)
2396 return 0;
2397
2398 assert_se(d->fd >= 0);
2399
2400 if (t == 0) {
2401 /* We don' want to disarm here, just mean some time looooong ago. */
2402 its.it_value.tv_sec = 0;
2403 its.it_value.tv_nsec = 1;
2404 } else
2405 timespec_store(&its.it_value, t);
2406
2407 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2408 if (r < 0)
2409 return -errno;
2410
2411 d->next = t;
2412 return 0;
2413 }
2414
2415 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2416 assert(e);
2417 assert(s);
2418 assert(s->type == SOURCE_IO);
2419
2420 /* If the event source was already pending, we just OR in the
2421 * new revents, otherwise we reset the value. The ORing is
2422 * necessary to handle EPOLLONESHOT events properly where
2423 * readability might happen independently of writability, and
2424 * we need to keep track of both */
2425
2426 if (s->pending)
2427 s->io.revents |= revents;
2428 else
2429 s->io.revents = revents;
2430
2431 return source_set_pending(s, true);
2432 }
2433
2434 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2435 uint64_t x;
2436 ssize_t ss;
2437
2438 assert(e);
2439 assert(fd >= 0);
2440
2441 assert_return(events == EPOLLIN, -EIO);
2442
2443 ss = read(fd, &x, sizeof(x));
2444 if (ss < 0) {
2445 if (IN_SET(errno, EAGAIN, EINTR))
2446 return 0;
2447
2448 return -errno;
2449 }
2450
2451 if (_unlikely_(ss != sizeof(x)))
2452 return -EIO;
2453
2454 if (next)
2455 *next = USEC_INFINITY;
2456
2457 return 0;
2458 }
2459
2460 static int process_timer(
2461 sd_event *e,
2462 usec_t n,
2463 struct clock_data *d) {
2464
2465 sd_event_source *s;
2466 int r;
2467
2468 assert(e);
2469 assert(d);
2470
2471 for (;;) {
2472 s = prioq_peek(d->earliest);
2473 if (!s ||
2474 s->time.next > n ||
2475 s->enabled == SD_EVENT_OFF ||
2476 s->pending)
2477 break;
2478
2479 r = source_set_pending(s, true);
2480 if (r < 0)
2481 return r;
2482
2483 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2484 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2485 d->needs_rearm = true;
2486 }
2487
2488 return 0;
2489 }
2490
2491 static int process_child(sd_event *e) {
2492 sd_event_source *s;
2493 Iterator i;
2494 int r;
2495
2496 assert(e);
2497
2498 e->need_process_child = false;
2499
2500 /*
2501 So, this is ugly. We iteratively invoke waitid() with P_PID
2502 + WNOHANG for each PID we wait for, instead of using
2503 P_ALL. This is because we only want to get child
2504 information of very specific child processes, and not all
2505 of them. We might not have processed the SIGCHLD even of a
2506 previous invocation and we don't want to maintain a
2507 unbounded *per-child* event queue, hence we really don't
2508 want anything flushed out of the kernel's queue that we
2509 don't care about. Since this is O(n) this means that if you
2510 have a lot of processes you probably want to handle SIGCHLD
2511 yourself.
2512
2513 We do not reap the children here (by using WNOWAIT), this
2514 is only done after the event source is dispatched so that
2515 the callback still sees the process as a zombie.
2516 */
2517
2518 HASHMAP_FOREACH(s, e->child_sources, i) {
2519 assert(s->type == SOURCE_CHILD);
2520
2521 if (s->pending)
2522 continue;
2523
2524 if (s->enabled == SD_EVENT_OFF)
2525 continue;
2526
2527 zero(s->child.siginfo);
2528 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2529 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2530 if (r < 0)
2531 return -errno;
2532
2533 if (s->child.siginfo.si_pid != 0) {
2534 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2535
2536 if (!zombie && (s->child.options & WEXITED)) {
2537 /* If the child isn't dead then let's
2538 * immediately remove the state change
2539 * from the queue, since there's no
2540 * benefit in leaving it queued */
2541
2542 assert(s->child.options & (WSTOPPED|WCONTINUED));
2543 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2544 }
2545
2546 r = source_set_pending(s, true);
2547 if (r < 0)
2548 return r;
2549 }
2550 }
2551
2552 return 0;
2553 }
2554
2555 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2556 bool read_one = false;
2557 int r;
2558
2559 assert(e);
2560 assert(d);
2561 assert_return(events == EPOLLIN, -EIO);
2562
2563 /* If there's a signal queued on this priority and SIGCHLD is
2564 on this priority too, then make sure to recheck the
2565 children we watch. This is because we only ever dequeue
2566 the first signal per priority, and if we dequeue one, and
2567 SIGCHLD might be enqueued later we wouldn't know, but we
2568 might have higher priority children we care about hence we
2569 need to check that explicitly. */
2570
2571 if (sigismember(&d->sigset, SIGCHLD))
2572 e->need_process_child = true;
2573
2574 /* If there's already an event source pending for this
2575 * priority we don't read another */
2576 if (d->current)
2577 return 0;
2578
2579 for (;;) {
2580 struct signalfd_siginfo si;
2581 ssize_t n;
2582 sd_event_source *s = NULL;
2583
2584 n = read(d->fd, &si, sizeof(si));
2585 if (n < 0) {
2586 if (IN_SET(errno, EAGAIN, EINTR))
2587 return read_one;
2588
2589 return -errno;
2590 }
2591
2592 if (_unlikely_(n != sizeof(si)))
2593 return -EIO;
2594
2595 assert(SIGNAL_VALID(si.ssi_signo));
2596
2597 read_one = true;
2598
2599 if (e->signal_sources)
2600 s = e->signal_sources[si.ssi_signo];
2601 if (!s)
2602 continue;
2603 if (s->pending)
2604 continue;
2605
2606 s->signal.siginfo = si;
2607 d->current = s;
2608
2609 r = source_set_pending(s, true);
2610 if (r < 0)
2611 return r;
2612
2613 return 1;
2614 }
2615 }
2616
2617 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2618 ssize_t n;
2619
2620 assert(e);
2621 assert(d);
2622
2623 assert_return(revents == EPOLLIN, -EIO);
2624
2625 /* If there's already an event source pending for this priority, don't read another */
2626 if (d->n_pending > 0)
2627 return 0;
2628
2629 /* Is the read buffer non-empty? If so, let's not read more */
2630 if (d->buffer_filled > 0)
2631 return 0;
2632
2633 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2634 if (n < 0) {
2635 if (IN_SET(errno, EAGAIN, EINTR))
2636 return 0;
2637
2638 return -errno;
2639 }
2640
2641 assert(n > 0);
2642 d->buffer_filled = (size_t) n;
2643 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2644
2645 return 1;
2646 }
2647
2648 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2649 assert(e);
2650 assert(d);
2651 assert(sz <= d->buffer_filled);
2652
2653 if (sz == 0)
2654 return;
2655
2656 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2657 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2658 d->buffer_filled -= sz;
2659
2660 if (d->buffer_filled == 0)
2661 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2662 }
2663
2664 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2665 int r;
2666
2667 assert(e);
2668 assert(d);
2669
2670 /* If there's already an event source pending for this priority, don't read another */
2671 if (d->n_pending > 0)
2672 return 0;
2673
2674 while (d->buffer_filled > 0) {
2675 size_t sz;
2676
2677 /* Let's validate that the event structures are complete */
2678 if (d->buffer_filled < offsetof(struct inotify_event, name))
2679 return -EIO;
2680
2681 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2682 if (d->buffer_filled < sz)
2683 return -EIO;
2684
2685 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2686 struct inode_data *inode_data;
2687 Iterator i;
2688
2689 /* The queue overran, let's pass this event to all event sources connected to this inotify
2690 * object */
2691
2692 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2693 sd_event_source *s;
2694
2695 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2696
2697 if (s->enabled == SD_EVENT_OFF)
2698 continue;
2699
2700 r = source_set_pending(s, true);
2701 if (r < 0)
2702 return r;
2703 }
2704 }
2705 } else {
2706 struct inode_data *inode_data;
2707 sd_event_source *s;
2708
2709 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2710 * our watch descriptor table. */
2711 if (d->buffer.ev.mask & IN_IGNORED) {
2712
2713 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2714 if (!inode_data) {
2715 event_inotify_data_drop(e, d, sz);
2716 continue;
2717 }
2718
2719 /* The watch descriptor was removed by the kernel, let's drop it here too */
2720 inode_data->wd = -1;
2721 } else {
2722 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2723 if (!inode_data) {
2724 event_inotify_data_drop(e, d, sz);
2725 continue;
2726 }
2727 }
2728
2729 /* Trigger all event sources that are interested in these events. Also trigger all event
2730 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2731 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2732
2733 if (s->enabled == SD_EVENT_OFF)
2734 continue;
2735
2736 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2737 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2738 continue;
2739
2740 r = source_set_pending(s, true);
2741 if (r < 0)
2742 return r;
2743 }
2744 }
2745
2746 /* Something pending now? If so, let's finish, otherwise let's read more. */
2747 if (d->n_pending > 0)
2748 return 1;
2749 }
2750
2751 return 0;
2752 }
2753
2754 static int process_inotify(sd_event *e) {
2755 struct inotify_data *d;
2756 int r, done = 0;
2757
2758 assert(e);
2759
2760 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2761 r = event_inotify_data_process(e, d);
2762 if (r < 0)
2763 return r;
2764 if (r > 0)
2765 done ++;
2766 }
2767
2768 return done;
2769 }
2770
2771 static int source_dispatch(sd_event_source *s) {
2772 EventSourceType saved_type;
2773 int r = 0;
2774
2775 assert(s);
2776 assert(s->pending || s->type == SOURCE_EXIT);
2777
2778 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2779 * the event. */
2780 saved_type = s->type;
2781
2782 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2783 r = source_set_pending(s, false);
2784 if (r < 0)
2785 return r;
2786 }
2787
2788 if (s->type != SOURCE_POST) {
2789 sd_event_source *z;
2790 Iterator i;
2791
2792 /* If we execute a non-post source, let's mark all
2793 * post sources as pending */
2794
2795 SET_FOREACH(z, s->event->post_sources, i) {
2796 if (z->enabled == SD_EVENT_OFF)
2797 continue;
2798
2799 r = source_set_pending(z, true);
2800 if (r < 0)
2801 return r;
2802 }
2803 }
2804
2805 if (s->enabled == SD_EVENT_ONESHOT) {
2806 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2807 if (r < 0)
2808 return r;
2809 }
2810
2811 s->dispatching = true;
2812
2813 switch (s->type) {
2814
2815 case SOURCE_IO:
2816 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2817 break;
2818
2819 case SOURCE_TIME_REALTIME:
2820 case SOURCE_TIME_BOOTTIME:
2821 case SOURCE_TIME_MONOTONIC:
2822 case SOURCE_TIME_REALTIME_ALARM:
2823 case SOURCE_TIME_BOOTTIME_ALARM:
2824 r = s->time.callback(s, s->time.next, s->userdata);
2825 break;
2826
2827 case SOURCE_SIGNAL:
2828 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2829 break;
2830
2831 case SOURCE_CHILD: {
2832 bool zombie;
2833
2834 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2835
2836 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2837
2838 /* Now, reap the PID for good. */
2839 if (zombie)
2840 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2841
2842 break;
2843 }
2844
2845 case SOURCE_DEFER:
2846 r = s->defer.callback(s, s->userdata);
2847 break;
2848
2849 case SOURCE_POST:
2850 r = s->post.callback(s, s->userdata);
2851 break;
2852
2853 case SOURCE_EXIT:
2854 r = s->exit.callback(s, s->userdata);
2855 break;
2856
2857 case SOURCE_INOTIFY: {
2858 struct sd_event *e = s->event;
2859 struct inotify_data *d;
2860 size_t sz;
2861
2862 assert(s->inotify.inode_data);
2863 assert_se(d = s->inotify.inode_data->inotify_data);
2864
2865 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
2866 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2867 assert(d->buffer_filled >= sz);
2868
2869 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
2870
2871 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2872 * buffer. */
2873 if (d->n_pending == 0)
2874 event_inotify_data_drop(e, d, sz);
2875
2876 break;
2877 }
2878
2879 case SOURCE_WATCHDOG:
2880 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2881 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2882 assert_not_reached("Wut? I shouldn't exist.");
2883 }
2884
2885 s->dispatching = false;
2886
2887 if (r < 0)
2888 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2889 strna(s->description), event_source_type_to_string(saved_type));
2890
2891 if (s->n_ref == 0)
2892 source_free(s);
2893 else if (r < 0)
2894 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2895
2896 return 1;
2897 }
2898
2899 static int event_prepare(sd_event *e) {
2900 int r;
2901
2902 assert(e);
2903
2904 for (;;) {
2905 sd_event_source *s;
2906
2907 s = prioq_peek(e->prepare);
2908 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2909 break;
2910
2911 s->prepare_iteration = e->iteration;
2912 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2913 if (r < 0)
2914 return r;
2915
2916 assert(s->prepare);
2917
2918 s->dispatching = true;
2919 r = s->prepare(s, s->userdata);
2920 s->dispatching = false;
2921
2922 if (r < 0)
2923 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2924 strna(s->description), event_source_type_to_string(s->type));
2925
2926 if (s->n_ref == 0)
2927 source_free(s);
2928 else if (r < 0)
2929 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2930 }
2931
2932 return 0;
2933 }
2934
2935 static int dispatch_exit(sd_event *e) {
2936 sd_event_source *p;
2937 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2938 int r;
2939
2940 assert(e);
2941
2942 p = prioq_peek(e->exit);
2943 if (!p || p->enabled == SD_EVENT_OFF) {
2944 e->state = SD_EVENT_FINISHED;
2945 return 0;
2946 }
2947
2948 ref = sd_event_ref(e);
2949 e->iteration++;
2950 e->state = SD_EVENT_EXITING;
2951 r = source_dispatch(p);
2952 e->state = SD_EVENT_INITIAL;
2953 return r;
2954 }
2955
2956 static sd_event_source* event_next_pending(sd_event *e) {
2957 sd_event_source *p;
2958
2959 assert(e);
2960
2961 p = prioq_peek(e->pending);
2962 if (!p)
2963 return NULL;
2964
2965 if (p->enabled == SD_EVENT_OFF)
2966 return NULL;
2967
2968 return p;
2969 }
2970
2971 static int arm_watchdog(sd_event *e) {
2972 struct itimerspec its = {};
2973 usec_t t;
2974 int r;
2975
2976 assert(e);
2977 assert(e->watchdog_fd >= 0);
2978
2979 t = sleep_between(e,
2980 e->watchdog_last + (e->watchdog_period / 2),
2981 e->watchdog_last + (e->watchdog_period * 3 / 4));
2982
2983 timespec_store(&its.it_value, t);
2984
2985 /* Make sure we never set the watchdog to 0, which tells the
2986 * kernel to disable it. */
2987 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2988 its.it_value.tv_nsec = 1;
2989
2990 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2991 if (r < 0)
2992 return -errno;
2993
2994 return 0;
2995 }
2996
2997 static int process_watchdog(sd_event *e) {
2998 assert(e);
2999
3000 if (!e->watchdog)
3001 return 0;
3002
3003 /* Don't notify watchdog too often */
3004 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3005 return 0;
3006
3007 sd_notify(false, "WATCHDOG=1");
3008 e->watchdog_last = e->timestamp.monotonic;
3009
3010 return arm_watchdog(e);
3011 }
3012
3013 static void event_close_inode_data_fds(sd_event *e) {
3014 struct inode_data *d;
3015
3016 assert(e);
3017
3018 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3019 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3020 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3021 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3022 * compromise. */
3023
3024 while ((d = e->inode_data_to_close)) {
3025 assert(d->fd >= 0);
3026 d->fd = safe_close(d->fd);
3027
3028 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3029 }
3030 }
3031
3032 _public_ int sd_event_prepare(sd_event *e) {
3033 int r;
3034
3035 assert_return(e, -EINVAL);
3036 assert_return(e = event_resolve(e), -ENOPKG);
3037 assert_return(!event_pid_changed(e), -ECHILD);
3038 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3039 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3040
3041 if (e->exit_requested)
3042 goto pending;
3043
3044 e->iteration++;
3045
3046 e->state = SD_EVENT_PREPARING;
3047 r = event_prepare(e);
3048 e->state = SD_EVENT_INITIAL;
3049 if (r < 0)
3050 return r;
3051
3052 r = event_arm_timer(e, &e->realtime);
3053 if (r < 0)
3054 return r;
3055
3056 r = event_arm_timer(e, &e->boottime);
3057 if (r < 0)
3058 return r;
3059
3060 r = event_arm_timer(e, &e->monotonic);
3061 if (r < 0)
3062 return r;
3063
3064 r = event_arm_timer(e, &e->realtime_alarm);
3065 if (r < 0)
3066 return r;
3067
3068 r = event_arm_timer(e, &e->boottime_alarm);
3069 if (r < 0)
3070 return r;
3071
3072 event_close_inode_data_fds(e);
3073
3074 if (event_next_pending(e) || e->need_process_child)
3075 goto pending;
3076
3077 e->state = SD_EVENT_ARMED;
3078
3079 return 0;
3080
3081 pending:
3082 e->state = SD_EVENT_ARMED;
3083 r = sd_event_wait(e, 0);
3084 if (r == 0)
3085 e->state = SD_EVENT_ARMED;
3086
3087 return r;
3088 }
3089
3090 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3091 struct epoll_event *ev_queue;
3092 unsigned ev_queue_max;
3093 int r, m, i;
3094
3095 assert_return(e, -EINVAL);
3096 assert_return(e = event_resolve(e), -ENOPKG);
3097 assert_return(!event_pid_changed(e), -ECHILD);
3098 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3099 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3100
3101 if (e->exit_requested) {
3102 e->state = SD_EVENT_PENDING;
3103 return 1;
3104 }
3105
3106 ev_queue_max = MAX(e->n_sources, 1u);
3107 ev_queue = newa(struct epoll_event, ev_queue_max);
3108
3109 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3110 if (e->inotify_data_buffered)
3111 timeout = 0;
3112
3113 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3114 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3115 if (m < 0) {
3116 if (errno == EINTR) {
3117 e->state = SD_EVENT_PENDING;
3118 return 1;
3119 }
3120
3121 r = -errno;
3122 goto finish;
3123 }
3124
3125 triple_timestamp_get(&e->timestamp);
3126
3127 for (i = 0; i < m; i++) {
3128
3129 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3130 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3131 else {
3132 WakeupType *t = ev_queue[i].data.ptr;
3133
3134 switch (*t) {
3135
3136 case WAKEUP_EVENT_SOURCE:
3137 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3138 break;
3139
3140 case WAKEUP_CLOCK_DATA: {
3141 struct clock_data *d = ev_queue[i].data.ptr;
3142 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3143 break;
3144 }
3145
3146 case WAKEUP_SIGNAL_DATA:
3147 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3148 break;
3149
3150 case WAKEUP_INOTIFY_DATA:
3151 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3152 break;
3153
3154 default:
3155 assert_not_reached("Invalid wake-up pointer");
3156 }
3157 }
3158 if (r < 0)
3159 goto finish;
3160 }
3161
3162 r = process_watchdog(e);
3163 if (r < 0)
3164 goto finish;
3165
3166 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3167 if (r < 0)
3168 goto finish;
3169
3170 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3171 if (r < 0)
3172 goto finish;
3173
3174 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3175 if (r < 0)
3176 goto finish;
3177
3178 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3179 if (r < 0)
3180 goto finish;
3181
3182 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3183 if (r < 0)
3184 goto finish;
3185
3186 if (e->need_process_child) {
3187 r = process_child(e);
3188 if (r < 0)
3189 goto finish;
3190 }
3191
3192 r = process_inotify(e);
3193 if (r < 0)
3194 goto finish;
3195
3196 if (event_next_pending(e)) {
3197 e->state = SD_EVENT_PENDING;
3198
3199 return 1;
3200 }
3201
3202 r = 0;
3203
3204 finish:
3205 e->state = SD_EVENT_INITIAL;
3206
3207 return r;
3208 }
3209
3210 _public_ int sd_event_dispatch(sd_event *e) {
3211 sd_event_source *p;
3212 int r;
3213
3214 assert_return(e, -EINVAL);
3215 assert_return(e = event_resolve(e), -ENOPKG);
3216 assert_return(!event_pid_changed(e), -ECHILD);
3217 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3218 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3219
3220 if (e->exit_requested)
3221 return dispatch_exit(e);
3222
3223 p = event_next_pending(e);
3224 if (p) {
3225 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3226
3227 ref = sd_event_ref(e);
3228 e->state = SD_EVENT_RUNNING;
3229 r = source_dispatch(p);
3230 e->state = SD_EVENT_INITIAL;
3231 return r;
3232 }
3233
3234 e->state = SD_EVENT_INITIAL;
3235
3236 return 1;
3237 }
3238
3239 static void event_log_delays(sd_event *e) {
3240 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3241 unsigned i;
3242 int o;
3243
3244 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3245 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3246 e->delays[i] = 0;
3247 }
3248 log_debug("Event loop iterations: %.*s", o, b);
3249 }
3250
3251 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3252 int r;
3253
3254 assert_return(e, -EINVAL);
3255 assert_return(e = event_resolve(e), -ENOPKG);
3256 assert_return(!event_pid_changed(e), -ECHILD);
3257 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3258 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3259
3260 if (e->profile_delays && e->last_run) {
3261 usec_t this_run;
3262 unsigned l;
3263
3264 this_run = now(CLOCK_MONOTONIC);
3265
3266 l = u64log2(this_run - e->last_run);
3267 assert(l < sizeof(e->delays));
3268 e->delays[l]++;
3269
3270 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3271 event_log_delays(e);
3272 e->last_log = this_run;
3273 }
3274 }
3275
3276 r = sd_event_prepare(e);
3277 if (r == 0)
3278 /* There was nothing? Then wait... */
3279 r = sd_event_wait(e, timeout);
3280
3281 if (e->profile_delays)
3282 e->last_run = now(CLOCK_MONOTONIC);
3283
3284 if (r > 0) {
3285 /* There's something now, then let's dispatch it */
3286 r = sd_event_dispatch(e);
3287 if (r < 0)
3288 return r;
3289
3290 return 1;
3291 }
3292
3293 return r;
3294 }
3295
3296 _public_ int sd_event_loop(sd_event *e) {
3297 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3298 int r;
3299
3300 assert_return(e, -EINVAL);
3301 assert_return(e = event_resolve(e), -ENOPKG);
3302 assert_return(!event_pid_changed(e), -ECHILD);
3303 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3304
3305 ref = sd_event_ref(e);
3306
3307 while (e->state != SD_EVENT_FINISHED) {
3308 r = sd_event_run(e, (uint64_t) -1);
3309 if (r < 0)
3310 return r;
3311 }
3312
3313 return e->exit_code;
3314 }
3315
3316 _public_ int sd_event_get_fd(sd_event *e) {
3317
3318 assert_return(e, -EINVAL);
3319 assert_return(e = event_resolve(e), -ENOPKG);
3320 assert_return(!event_pid_changed(e), -ECHILD);
3321
3322 return e->epoll_fd;
3323 }
3324
3325 _public_ int sd_event_get_state(sd_event *e) {
3326 assert_return(e, -EINVAL);
3327 assert_return(e = event_resolve(e), -ENOPKG);
3328 assert_return(!event_pid_changed(e), -ECHILD);
3329
3330 return e->state;
3331 }
3332
3333 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3334 assert_return(e, -EINVAL);
3335 assert_return(e = event_resolve(e), -ENOPKG);
3336 assert_return(code, -EINVAL);
3337 assert_return(!event_pid_changed(e), -ECHILD);
3338
3339 if (!e->exit_requested)
3340 return -ENODATA;
3341
3342 *code = e->exit_code;
3343 return 0;
3344 }
3345
3346 _public_ int sd_event_exit(sd_event *e, int code) {
3347 assert_return(e, -EINVAL);
3348 assert_return(e = event_resolve(e), -ENOPKG);
3349 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3350 assert_return(!event_pid_changed(e), -ECHILD);
3351
3352 e->exit_requested = true;
3353 e->exit_code = code;
3354
3355 return 0;
3356 }
3357
3358 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3359 assert_return(e, -EINVAL);
3360 assert_return(e = event_resolve(e), -ENOPKG);
3361 assert_return(usec, -EINVAL);
3362 assert_return(!event_pid_changed(e), -ECHILD);
3363
3364 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3365 return -EOPNOTSUPP;
3366
3367 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3368 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3369 * the purpose of getting the time this doesn't matter. */
3370 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3371 return -EOPNOTSUPP;
3372
3373 if (!triple_timestamp_is_set(&e->timestamp)) {
3374 /* Implicitly fall back to now() if we never ran
3375 * before and thus have no cached time. */
3376 *usec = now(clock);
3377 return 1;
3378 }
3379
3380 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3381 return 0;
3382 }
3383
3384 _public_ int sd_event_default(sd_event **ret) {
3385 sd_event *e = NULL;
3386 int r;
3387
3388 if (!ret)
3389 return !!default_event;
3390
3391 if (default_event) {
3392 *ret = sd_event_ref(default_event);
3393 return 0;
3394 }
3395
3396 r = sd_event_new(&e);
3397 if (r < 0)
3398 return r;
3399
3400 e->default_event_ptr = &default_event;
3401 e->tid = gettid();
3402 default_event = e;
3403
3404 *ret = e;
3405 return 1;
3406 }
3407
3408 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3409 assert_return(e, -EINVAL);
3410 assert_return(e = event_resolve(e), -ENOPKG);
3411 assert_return(tid, -EINVAL);
3412 assert_return(!event_pid_changed(e), -ECHILD);
3413
3414 if (e->tid != 0) {
3415 *tid = e->tid;
3416 return 0;
3417 }
3418
3419 return -ENXIO;
3420 }
3421
3422 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3423 int r;
3424
3425 assert_return(e, -EINVAL);
3426 assert_return(e = event_resolve(e), -ENOPKG);
3427 assert_return(!event_pid_changed(e), -ECHILD);
3428
3429 if (e->watchdog == !!b)
3430 return e->watchdog;
3431
3432 if (b) {
3433 struct epoll_event ev;
3434
3435 r = sd_watchdog_enabled(false, &e->watchdog_period);
3436 if (r <= 0)
3437 return r;
3438
3439 /* Issue first ping immediately */
3440 sd_notify(false, "WATCHDOG=1");
3441 e->watchdog_last = now(CLOCK_MONOTONIC);
3442
3443 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3444 if (e->watchdog_fd < 0)
3445 return -errno;
3446
3447 r = arm_watchdog(e);
3448 if (r < 0)
3449 goto fail;
3450
3451 ev = (struct epoll_event) {
3452 .events = EPOLLIN,
3453 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3454 };
3455
3456 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3457 if (r < 0) {
3458 r = -errno;
3459 goto fail;
3460 }
3461
3462 } else {
3463 if (e->watchdog_fd >= 0) {
3464 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3465 e->watchdog_fd = safe_close(e->watchdog_fd);
3466 }
3467 }
3468
3469 e->watchdog = !!b;
3470 return e->watchdog;
3471
3472 fail:
3473 e->watchdog_fd = safe_close(e->watchdog_fd);
3474 return r;
3475 }
3476
3477 _public_ int sd_event_get_watchdog(sd_event *e) {
3478 assert_return(e, -EINVAL);
3479 assert_return(e = event_resolve(e), -ENOPKG);
3480 assert_return(!event_pid_changed(e), -ECHILD);
3481
3482 return e->watchdog;
3483 }
3484
3485 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3486 assert_return(e, -EINVAL);
3487 assert_return(e = event_resolve(e), -ENOPKG);
3488 assert_return(!event_pid_changed(e), -ECHILD);
3489
3490 *ret = e->iteration;
3491 return 0;
3492 }
3493
3494 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3495 assert_return(s, -EINVAL);
3496
3497 s->destroy_callback = callback;
3498 return 0;
3499 }
3500
3501 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3502 assert_return(s, -EINVAL);
3503
3504 if (ret)
3505 *ret = s->destroy_callback;
3506
3507 return !!s->destroy_callback;
3508 }
3509
3510 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3511 assert_return(s, -EINVAL);
3512
3513 return s->floating;
3514 }
3515
3516 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3517 assert_return(s, -EINVAL);
3518
3519 if (s->floating == !!b)
3520 return 0;
3521
3522 if (!s->event) /* Already disconnected */
3523 return -ESTALE;
3524
3525 s->floating = b;
3526
3527 if (b) {
3528 sd_event_source_ref(s);
3529 sd_event_unref(s->event);
3530 } else {
3531 sd_event_ref(s->event);
3532 sd_event_source_unref(s);
3533 }
3534
3535 return 1;
3536 }