]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
pkgconfig: define variables relative to ${prefix}/${rootprefix}/${sysconfdir}
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "event-source.h"
13 #include "fd-util.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "list.h"
17 #include "macro.h"
18 #include "missing.h"
19 #include "prioq.h"
20 #include "process-util.h"
21 #include "set.h"
22 #include "signal-util.h"
23 #include "string-table.h"
24 #include "string-util.h"
25 #include "time-util.h"
26 #include "util.h"
27
28 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
29
30 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
31 [SOURCE_IO] = "io",
32 [SOURCE_TIME_REALTIME] = "realtime",
33 [SOURCE_TIME_BOOTTIME] = "bootime",
34 [SOURCE_TIME_MONOTONIC] = "monotonic",
35 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
36 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
37 [SOURCE_SIGNAL] = "signal",
38 [SOURCE_CHILD] = "child",
39 [SOURCE_DEFER] = "defer",
40 [SOURCE_POST] = "post",
41 [SOURCE_EXIT] = "exit",
42 [SOURCE_WATCHDOG] = "watchdog",
43 [SOURCE_INOTIFY] = "inotify",
44 };
45
46 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
47
48 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
49
50 struct sd_event {
51 unsigned n_ref;
52
53 int epoll_fd;
54 int watchdog_fd;
55
56 Prioq *pending;
57 Prioq *prepare;
58
59 /* timerfd_create() only supports these five clocks so far. We
60 * can add support for more clocks when the kernel learns to
61 * deal with them, too. */
62 struct clock_data realtime;
63 struct clock_data boottime;
64 struct clock_data monotonic;
65 struct clock_data realtime_alarm;
66 struct clock_data boottime_alarm;
67
68 usec_t perturb;
69
70 sd_event_source **signal_sources; /* indexed by signal number */
71 Hashmap *signal_data; /* indexed by priority */
72
73 Hashmap *child_sources;
74 unsigned n_enabled_child_sources;
75
76 Set *post_sources;
77
78 Prioq *exit;
79
80 Hashmap *inotify_data; /* indexed by priority */
81
82 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
83 LIST_HEAD(struct inode_data, inode_data_to_close);
84
85 /* A list of inotify objects that already have events buffered which aren't processed yet */
86 LIST_HEAD(struct inotify_data, inotify_data_buffered);
87
88 pid_t original_pid;
89
90 uint64_t iteration;
91 triple_timestamp timestamp;
92 int state;
93
94 bool exit_requested:1;
95 bool need_process_child:1;
96 bool watchdog:1;
97 bool profile_delays:1;
98
99 int exit_code;
100
101 pid_t tid;
102 sd_event **default_event_ptr;
103
104 usec_t watchdog_last, watchdog_period;
105
106 unsigned n_sources;
107
108 LIST_HEAD(sd_event_source, sources);
109
110 usec_t last_run, last_log;
111 unsigned delays[sizeof(usec_t) * 8];
112 };
113
114 static thread_local sd_event *default_event = NULL;
115
116 static void source_disconnect(sd_event_source *s);
117 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
118
119 static sd_event *event_resolve(sd_event *e) {
120 return e == SD_EVENT_DEFAULT ? default_event : e;
121 }
122
123 static int pending_prioq_compare(const void *a, const void *b) {
124 const sd_event_source *x = a, *y = b;
125 int r;
126
127 assert(x->pending);
128 assert(y->pending);
129
130 /* Enabled ones first */
131 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
132 return -1;
133 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
134 return 1;
135
136 /* Lower priority values first */
137 r = CMP(x->priority, y->priority);
138 if (r != 0)
139 return r;
140
141 /* Older entries first */
142 return CMP(x->pending_iteration, y->pending_iteration);
143 }
144
145 static int prepare_prioq_compare(const void *a, const void *b) {
146 const sd_event_source *x = a, *y = b;
147 int r;
148
149 assert(x->prepare);
150 assert(y->prepare);
151
152 /* Enabled ones first */
153 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
154 return -1;
155 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
156 return 1;
157
158 /* Move most recently prepared ones last, so that we can stop
159 * preparing as soon as we hit one that has already been
160 * prepared in the current iteration */
161 r = CMP(x->prepare_iteration, y->prepare_iteration);
162 if (r != 0)
163 return r;
164
165 /* Lower priority values first */
166 return CMP(x->priority, y->priority);
167 }
168
169 static int earliest_time_prioq_compare(const void *a, const void *b) {
170 const sd_event_source *x = a, *y = b;
171
172 assert(EVENT_SOURCE_IS_TIME(x->type));
173 assert(x->type == y->type);
174
175 /* Enabled ones first */
176 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
177 return -1;
178 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
179 return 1;
180
181 /* Move the pending ones to the end */
182 if (!x->pending && y->pending)
183 return -1;
184 if (x->pending && !y->pending)
185 return 1;
186
187 /* Order by time */
188 return CMP(x->time.next, y->time.next);
189 }
190
191 static usec_t time_event_source_latest(const sd_event_source *s) {
192 return usec_add(s->time.next, s->time.accuracy);
193 }
194
195 static int latest_time_prioq_compare(const void *a, const void *b) {
196 const sd_event_source *x = a, *y = b;
197
198 assert(EVENT_SOURCE_IS_TIME(x->type));
199 assert(x->type == y->type);
200
201 /* Enabled ones first */
202 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203 return -1;
204 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205 return 1;
206
207 /* Move the pending ones to the end */
208 if (!x->pending && y->pending)
209 return -1;
210 if (x->pending && !y->pending)
211 return 1;
212
213 /* Order by time */
214 return CMP(time_event_source_latest(x), time_event_source_latest(y));
215 }
216
217 static int exit_prioq_compare(const void *a, const void *b) {
218 const sd_event_source *x = a, *y = b;
219
220 assert(x->type == SOURCE_EXIT);
221 assert(y->type == SOURCE_EXIT);
222
223 /* Enabled ones first */
224 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
225 return -1;
226 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
227 return 1;
228
229 /* Lower priority values first */
230 return CMP(x->priority, y->priority);
231 }
232
233 static void free_clock_data(struct clock_data *d) {
234 assert(d);
235 assert(d->wakeup == WAKEUP_CLOCK_DATA);
236
237 safe_close(d->fd);
238 prioq_free(d->earliest);
239 prioq_free(d->latest);
240 }
241
242 static sd_event *event_free(sd_event *e) {
243 sd_event_source *s;
244
245 assert(e);
246
247 while ((s = e->sources)) {
248 assert(s->floating);
249 source_disconnect(s);
250 sd_event_source_unref(s);
251 }
252
253 assert(e->n_sources == 0);
254
255 if (e->default_event_ptr)
256 *(e->default_event_ptr) = NULL;
257
258 safe_close(e->epoll_fd);
259 safe_close(e->watchdog_fd);
260
261 free_clock_data(&e->realtime);
262 free_clock_data(&e->boottime);
263 free_clock_data(&e->monotonic);
264 free_clock_data(&e->realtime_alarm);
265 free_clock_data(&e->boottime_alarm);
266
267 prioq_free(e->pending);
268 prioq_free(e->prepare);
269 prioq_free(e->exit);
270
271 free(e->signal_sources);
272 hashmap_free(e->signal_data);
273
274 hashmap_free(e->inotify_data);
275
276 hashmap_free(e->child_sources);
277 set_free(e->post_sources);
278
279 return mfree(e);
280 }
281
282 _public_ int sd_event_new(sd_event** ret) {
283 sd_event *e;
284 int r;
285
286 assert_return(ret, -EINVAL);
287
288 e = new(sd_event, 1);
289 if (!e)
290 return -ENOMEM;
291
292 *e = (sd_event) {
293 .n_ref = 1,
294 .epoll_fd = -1,
295 .watchdog_fd = -1,
296 .realtime.wakeup = WAKEUP_CLOCK_DATA,
297 .realtime.fd = -1,
298 .realtime.next = USEC_INFINITY,
299 .boottime.wakeup = WAKEUP_CLOCK_DATA,
300 .boottime.fd = -1,
301 .boottime.next = USEC_INFINITY,
302 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
303 .monotonic.fd = -1,
304 .monotonic.next = USEC_INFINITY,
305 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
306 .realtime_alarm.fd = -1,
307 .realtime_alarm.next = USEC_INFINITY,
308 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
309 .boottime_alarm.fd = -1,
310 .boottime_alarm.next = USEC_INFINITY,
311 .perturb = USEC_INFINITY,
312 .original_pid = getpid_cached(),
313 };
314
315 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
316 if (r < 0)
317 goto fail;
318
319 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
320 if (e->epoll_fd < 0) {
321 r = -errno;
322 goto fail;
323 }
324
325 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
326
327 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
328 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
329 e->profile_delays = true;
330 }
331
332 *ret = e;
333 return 0;
334
335 fail:
336 event_free(e);
337 return r;
338 }
339
340 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
341
342 static bool event_pid_changed(sd_event *e) {
343 assert(e);
344
345 /* We don't support people creating an event loop and keeping
346 * it around over a fork(). Let's complain. */
347
348 return e->original_pid != getpid_cached();
349 }
350
351 static void source_io_unregister(sd_event_source *s) {
352 int r;
353
354 assert(s);
355 assert(s->type == SOURCE_IO);
356
357 if (event_pid_changed(s->event))
358 return;
359
360 if (!s->io.registered)
361 return;
362
363 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
364 if (r < 0)
365 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
366 strna(s->description), event_source_type_to_string(s->type));
367
368 s->io.registered = false;
369 }
370
371 static int source_io_register(
372 sd_event_source *s,
373 int enabled,
374 uint32_t events) {
375
376 struct epoll_event ev;
377 int r;
378
379 assert(s);
380 assert(s->type == SOURCE_IO);
381 assert(enabled != SD_EVENT_OFF);
382
383 ev = (struct epoll_event) {
384 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
385 .data.ptr = s,
386 };
387
388 if (s->io.registered)
389 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
390 else
391 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
392 if (r < 0)
393 return -errno;
394
395 s->io.registered = true;
396
397 return 0;
398 }
399
400 static clockid_t event_source_type_to_clock(EventSourceType t) {
401
402 switch (t) {
403
404 case SOURCE_TIME_REALTIME:
405 return CLOCK_REALTIME;
406
407 case SOURCE_TIME_BOOTTIME:
408 return CLOCK_BOOTTIME;
409
410 case SOURCE_TIME_MONOTONIC:
411 return CLOCK_MONOTONIC;
412
413 case SOURCE_TIME_REALTIME_ALARM:
414 return CLOCK_REALTIME_ALARM;
415
416 case SOURCE_TIME_BOOTTIME_ALARM:
417 return CLOCK_BOOTTIME_ALARM;
418
419 default:
420 return (clockid_t) -1;
421 }
422 }
423
424 static EventSourceType clock_to_event_source_type(clockid_t clock) {
425
426 switch (clock) {
427
428 case CLOCK_REALTIME:
429 return SOURCE_TIME_REALTIME;
430
431 case CLOCK_BOOTTIME:
432 return SOURCE_TIME_BOOTTIME;
433
434 case CLOCK_MONOTONIC:
435 return SOURCE_TIME_MONOTONIC;
436
437 case CLOCK_REALTIME_ALARM:
438 return SOURCE_TIME_REALTIME_ALARM;
439
440 case CLOCK_BOOTTIME_ALARM:
441 return SOURCE_TIME_BOOTTIME_ALARM;
442
443 default:
444 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
445 }
446 }
447
448 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
449 assert(e);
450
451 switch (t) {
452
453 case SOURCE_TIME_REALTIME:
454 return &e->realtime;
455
456 case SOURCE_TIME_BOOTTIME:
457 return &e->boottime;
458
459 case SOURCE_TIME_MONOTONIC:
460 return &e->monotonic;
461
462 case SOURCE_TIME_REALTIME_ALARM:
463 return &e->realtime_alarm;
464
465 case SOURCE_TIME_BOOTTIME_ALARM:
466 return &e->boottime_alarm;
467
468 default:
469 return NULL;
470 }
471 }
472
473 static int event_make_signal_data(
474 sd_event *e,
475 int sig,
476 struct signal_data **ret) {
477
478 struct epoll_event ev;
479 struct signal_data *d;
480 bool added = false;
481 sigset_t ss_copy;
482 int64_t priority;
483 int r;
484
485 assert(e);
486
487 if (event_pid_changed(e))
488 return -ECHILD;
489
490 if (e->signal_sources && e->signal_sources[sig])
491 priority = e->signal_sources[sig]->priority;
492 else
493 priority = SD_EVENT_PRIORITY_NORMAL;
494
495 d = hashmap_get(e->signal_data, &priority);
496 if (d) {
497 if (sigismember(&d->sigset, sig) > 0) {
498 if (ret)
499 *ret = d;
500 return 0;
501 }
502 } else {
503 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
504 if (r < 0)
505 return r;
506
507 d = new(struct signal_data, 1);
508 if (!d)
509 return -ENOMEM;
510
511 *d = (struct signal_data) {
512 .wakeup = WAKEUP_SIGNAL_DATA,
513 .fd = -1,
514 .priority = priority,
515 };
516
517 r = hashmap_put(e->signal_data, &d->priority, d);
518 if (r < 0) {
519 free(d);
520 return r;
521 }
522
523 added = true;
524 }
525
526 ss_copy = d->sigset;
527 assert_se(sigaddset(&ss_copy, sig) >= 0);
528
529 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
530 if (r < 0) {
531 r = -errno;
532 goto fail;
533 }
534
535 d->sigset = ss_copy;
536
537 if (d->fd >= 0) {
538 if (ret)
539 *ret = d;
540 return 0;
541 }
542
543 d->fd = fd_move_above_stdio(r);
544
545 ev = (struct epoll_event) {
546 .events = EPOLLIN,
547 .data.ptr = d,
548 };
549
550 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
551 if (r < 0) {
552 r = -errno;
553 goto fail;
554 }
555
556 if (ret)
557 *ret = d;
558
559 return 0;
560
561 fail:
562 if (added) {
563 d->fd = safe_close(d->fd);
564 hashmap_remove(e->signal_data, &d->priority);
565 free(d);
566 }
567
568 return r;
569 }
570
571 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
572 assert(e);
573 assert(d);
574
575 /* Turns off the specified signal in the signal data
576 * object. If the signal mask of the object becomes empty that
577 * way removes it. */
578
579 if (sigismember(&d->sigset, sig) == 0)
580 return;
581
582 assert_se(sigdelset(&d->sigset, sig) >= 0);
583
584 if (sigisemptyset(&d->sigset)) {
585
586 /* If all the mask is all-zero we can get rid of the structure */
587 hashmap_remove(e->signal_data, &d->priority);
588 safe_close(d->fd);
589 free(d);
590 return;
591 }
592
593 assert(d->fd >= 0);
594
595 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
596 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
597 }
598
599 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
600 struct signal_data *d;
601 static const int64_t zero_priority = 0;
602
603 assert(e);
604
605 /* Rechecks if the specified signal is still something we are
606 * interested in. If not, we'll unmask it, and possibly drop
607 * the signalfd for it. */
608
609 if (sig == SIGCHLD &&
610 e->n_enabled_child_sources > 0)
611 return;
612
613 if (e->signal_sources &&
614 e->signal_sources[sig] &&
615 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
616 return;
617
618 /*
619 * The specified signal might be enabled in three different queues:
620 *
621 * 1) the one that belongs to the priority passed (if it is non-NULL)
622 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
623 * 3) the 0 priority (to cover the SIGCHLD case)
624 *
625 * Hence, let's remove it from all three here.
626 */
627
628 if (priority) {
629 d = hashmap_get(e->signal_data, priority);
630 if (d)
631 event_unmask_signal_data(e, d, sig);
632 }
633
634 if (e->signal_sources && e->signal_sources[sig]) {
635 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
636 if (d)
637 event_unmask_signal_data(e, d, sig);
638 }
639
640 d = hashmap_get(e->signal_data, &zero_priority);
641 if (d)
642 event_unmask_signal_data(e, d, sig);
643 }
644
645 static void source_disconnect(sd_event_source *s) {
646 sd_event *event;
647
648 assert(s);
649
650 if (!s->event)
651 return;
652
653 assert(s->event->n_sources > 0);
654
655 switch (s->type) {
656
657 case SOURCE_IO:
658 if (s->io.fd >= 0)
659 source_io_unregister(s);
660
661 break;
662
663 case SOURCE_TIME_REALTIME:
664 case SOURCE_TIME_BOOTTIME:
665 case SOURCE_TIME_MONOTONIC:
666 case SOURCE_TIME_REALTIME_ALARM:
667 case SOURCE_TIME_BOOTTIME_ALARM: {
668 struct clock_data *d;
669
670 d = event_get_clock_data(s->event, s->type);
671 assert(d);
672
673 prioq_remove(d->earliest, s, &s->time.earliest_index);
674 prioq_remove(d->latest, s, &s->time.latest_index);
675 d->needs_rearm = true;
676 break;
677 }
678
679 case SOURCE_SIGNAL:
680 if (s->signal.sig > 0) {
681
682 if (s->event->signal_sources)
683 s->event->signal_sources[s->signal.sig] = NULL;
684
685 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
686 }
687
688 break;
689
690 case SOURCE_CHILD:
691 if (s->child.pid > 0) {
692 if (s->enabled != SD_EVENT_OFF) {
693 assert(s->event->n_enabled_child_sources > 0);
694 s->event->n_enabled_child_sources--;
695 }
696
697 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
698 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
699 }
700
701 break;
702
703 case SOURCE_DEFER:
704 /* nothing */
705 break;
706
707 case SOURCE_POST:
708 set_remove(s->event->post_sources, s);
709 break;
710
711 case SOURCE_EXIT:
712 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
713 break;
714
715 case SOURCE_INOTIFY: {
716 struct inode_data *inode_data;
717
718 inode_data = s->inotify.inode_data;
719 if (inode_data) {
720 struct inotify_data *inotify_data;
721 assert_se(inotify_data = inode_data->inotify_data);
722
723 /* Detach this event source from the inode object */
724 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
725 s->inotify.inode_data = NULL;
726
727 if (s->pending) {
728 assert(inotify_data->n_pending > 0);
729 inotify_data->n_pending--;
730 }
731
732 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
733 * continued to being watched. That's because inotify doesn't really have an API for that: we
734 * can only change watch masks with access to the original inode either by fd or by path. But
735 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
736 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
737 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
738 * there), but given the need for open_by_handle_at() which is privileged and not universally
739 * available this would be quite an incomplete solution. Hence we go the other way, leave the
740 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
741 * anymore after reception. Yes, this sucks, but … Linux … */
742
743 /* Maybe release the inode data (and its inotify) */
744 event_gc_inode_data(s->event, inode_data);
745 }
746
747 break;
748 }
749
750 default:
751 assert_not_reached("Wut? I shouldn't exist.");
752 }
753
754 if (s->pending)
755 prioq_remove(s->event->pending, s, &s->pending_index);
756
757 if (s->prepare)
758 prioq_remove(s->event->prepare, s, &s->prepare_index);
759
760 event = s->event;
761
762 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
763 s->event = NULL;
764 LIST_REMOVE(sources, event->sources, s);
765 event->n_sources--;
766
767 if (!s->floating)
768 sd_event_unref(event);
769 }
770
771 static void source_free(sd_event_source *s) {
772 assert(s);
773
774 source_disconnect(s);
775
776 if (s->type == SOURCE_IO && s->io.owned)
777 s->io.fd = safe_close(s->io.fd);
778
779 if (s->destroy_callback)
780 s->destroy_callback(s->userdata);
781
782 free(s->description);
783 free(s);
784 }
785 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
786
787 static int source_set_pending(sd_event_source *s, bool b) {
788 int r;
789
790 assert(s);
791 assert(s->type != SOURCE_EXIT);
792
793 if (s->pending == b)
794 return 0;
795
796 s->pending = b;
797
798 if (b) {
799 s->pending_iteration = s->event->iteration;
800
801 r = prioq_put(s->event->pending, s, &s->pending_index);
802 if (r < 0) {
803 s->pending = false;
804 return r;
805 }
806 } else
807 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
808
809 if (EVENT_SOURCE_IS_TIME(s->type)) {
810 struct clock_data *d;
811
812 d = event_get_clock_data(s->event, s->type);
813 assert(d);
814
815 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
816 prioq_reshuffle(d->latest, s, &s->time.latest_index);
817 d->needs_rearm = true;
818 }
819
820 if (s->type == SOURCE_SIGNAL && !b) {
821 struct signal_data *d;
822
823 d = hashmap_get(s->event->signal_data, &s->priority);
824 if (d && d->current == s)
825 d->current = NULL;
826 }
827
828 if (s->type == SOURCE_INOTIFY) {
829
830 assert(s->inotify.inode_data);
831 assert(s->inotify.inode_data->inotify_data);
832
833 if (b)
834 s->inotify.inode_data->inotify_data->n_pending ++;
835 else {
836 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
837 s->inotify.inode_data->inotify_data->n_pending --;
838 }
839 }
840
841 return 0;
842 }
843
844 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
845 sd_event_source *s;
846
847 assert(e);
848
849 s = new(sd_event_source, 1);
850 if (!s)
851 return NULL;
852
853 *s = (struct sd_event_source) {
854 .n_ref = 1,
855 .event = e,
856 .floating = floating,
857 .type = type,
858 .pending_index = PRIOQ_IDX_NULL,
859 .prepare_index = PRIOQ_IDX_NULL,
860 };
861
862 if (!floating)
863 sd_event_ref(e);
864
865 LIST_PREPEND(sources, e->sources, s);
866 e->n_sources++;
867
868 return s;
869 }
870
871 _public_ int sd_event_add_io(
872 sd_event *e,
873 sd_event_source **ret,
874 int fd,
875 uint32_t events,
876 sd_event_io_handler_t callback,
877 void *userdata) {
878
879 _cleanup_(source_freep) sd_event_source *s = NULL;
880 int r;
881
882 assert_return(e, -EINVAL);
883 assert_return(e = event_resolve(e), -ENOPKG);
884 assert_return(fd >= 0, -EBADF);
885 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
886 assert_return(callback, -EINVAL);
887 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
888 assert_return(!event_pid_changed(e), -ECHILD);
889
890 s = source_new(e, !ret, SOURCE_IO);
891 if (!s)
892 return -ENOMEM;
893
894 s->wakeup = WAKEUP_EVENT_SOURCE;
895 s->io.fd = fd;
896 s->io.events = events;
897 s->io.callback = callback;
898 s->userdata = userdata;
899 s->enabled = SD_EVENT_ON;
900
901 r = source_io_register(s, s->enabled, events);
902 if (r < 0)
903 return r;
904
905 if (ret)
906 *ret = s;
907 TAKE_PTR(s);
908
909 return 0;
910 }
911
912 static void initialize_perturb(sd_event *e) {
913 sd_id128_t bootid = {};
914
915 /* When we sleep for longer, we try to realign the wakeup to
916 the same time within each minute/second/250ms, so that
917 events all across the system can be coalesced into a single
918 CPU wakeup. However, let's take some system-specific
919 randomness for this value, so that in a network of systems
920 with synced clocks timer events are distributed a
921 bit. Here, we calculate a perturbation usec offset from the
922 boot ID. */
923
924 if (_likely_(e->perturb != USEC_INFINITY))
925 return;
926
927 if (sd_id128_get_boot(&bootid) >= 0)
928 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
929 }
930
931 static int event_setup_timer_fd(
932 sd_event *e,
933 struct clock_data *d,
934 clockid_t clock) {
935
936 struct epoll_event ev;
937 int r, fd;
938
939 assert(e);
940 assert(d);
941
942 if (_likely_(d->fd >= 0))
943 return 0;
944
945 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
946 if (fd < 0)
947 return -errno;
948
949 fd = fd_move_above_stdio(fd);
950
951 ev = (struct epoll_event) {
952 .events = EPOLLIN,
953 .data.ptr = d,
954 };
955
956 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
957 if (r < 0) {
958 safe_close(fd);
959 return -errno;
960 }
961
962 d->fd = fd;
963 return 0;
964 }
965
966 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
967 assert(s);
968
969 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
970 }
971
972 _public_ int sd_event_add_time(
973 sd_event *e,
974 sd_event_source **ret,
975 clockid_t clock,
976 uint64_t usec,
977 uint64_t accuracy,
978 sd_event_time_handler_t callback,
979 void *userdata) {
980
981 EventSourceType type;
982 _cleanup_(source_freep) sd_event_source *s = NULL;
983 struct clock_data *d;
984 int r;
985
986 assert_return(e, -EINVAL);
987 assert_return(e = event_resolve(e), -ENOPKG);
988 assert_return(accuracy != (uint64_t) -1, -EINVAL);
989 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
990 assert_return(!event_pid_changed(e), -ECHILD);
991
992 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
993 return -EOPNOTSUPP;
994
995 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
996 if (type < 0)
997 return -EOPNOTSUPP;
998
999 if (!callback)
1000 callback = time_exit_callback;
1001
1002 d = event_get_clock_data(e, type);
1003 assert(d);
1004
1005 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1006 if (r < 0)
1007 return r;
1008
1009 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1010 if (r < 0)
1011 return r;
1012
1013 if (d->fd < 0) {
1014 r = event_setup_timer_fd(e, d, clock);
1015 if (r < 0)
1016 return r;
1017 }
1018
1019 s = source_new(e, !ret, type);
1020 if (!s)
1021 return -ENOMEM;
1022
1023 s->time.next = usec;
1024 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1025 s->time.callback = callback;
1026 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1027 s->userdata = userdata;
1028 s->enabled = SD_EVENT_ONESHOT;
1029
1030 d->needs_rearm = true;
1031
1032 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1033 if (r < 0)
1034 return r;
1035
1036 r = prioq_put(d->latest, s, &s->time.latest_index);
1037 if (r < 0)
1038 return r;
1039
1040 if (ret)
1041 *ret = s;
1042 TAKE_PTR(s);
1043
1044 return 0;
1045 }
1046
1047 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1048 assert(s);
1049
1050 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1051 }
1052
1053 _public_ int sd_event_add_signal(
1054 sd_event *e,
1055 sd_event_source **ret,
1056 int sig,
1057 sd_event_signal_handler_t callback,
1058 void *userdata) {
1059
1060 _cleanup_(source_freep) sd_event_source *s = NULL;
1061 struct signal_data *d;
1062 sigset_t ss;
1063 int r;
1064
1065 assert_return(e, -EINVAL);
1066 assert_return(e = event_resolve(e), -ENOPKG);
1067 assert_return(SIGNAL_VALID(sig), -EINVAL);
1068 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1069 assert_return(!event_pid_changed(e), -ECHILD);
1070
1071 if (!callback)
1072 callback = signal_exit_callback;
1073
1074 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1075 if (r != 0)
1076 return -r;
1077
1078 if (!sigismember(&ss, sig))
1079 return -EBUSY;
1080
1081 if (!e->signal_sources) {
1082 e->signal_sources = new0(sd_event_source*, _NSIG);
1083 if (!e->signal_sources)
1084 return -ENOMEM;
1085 } else if (e->signal_sources[sig])
1086 return -EBUSY;
1087
1088 s = source_new(e, !ret, SOURCE_SIGNAL);
1089 if (!s)
1090 return -ENOMEM;
1091
1092 s->signal.sig = sig;
1093 s->signal.callback = callback;
1094 s->userdata = userdata;
1095 s->enabled = SD_EVENT_ON;
1096
1097 e->signal_sources[sig] = s;
1098
1099 r = event_make_signal_data(e, sig, &d);
1100 if (r < 0)
1101 return r;
1102
1103 /* Use the signal name as description for the event source by default */
1104 (void) sd_event_source_set_description(s, signal_to_string(sig));
1105
1106 if (ret)
1107 *ret = s;
1108 TAKE_PTR(s);
1109
1110 return 0;
1111 }
1112
1113 _public_ int sd_event_add_child(
1114 sd_event *e,
1115 sd_event_source **ret,
1116 pid_t pid,
1117 int options,
1118 sd_event_child_handler_t callback,
1119 void *userdata) {
1120
1121 _cleanup_(source_freep) sd_event_source *s = NULL;
1122 int r;
1123
1124 assert_return(e, -EINVAL);
1125 assert_return(e = event_resolve(e), -ENOPKG);
1126 assert_return(pid > 1, -EINVAL);
1127 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1128 assert_return(options != 0, -EINVAL);
1129 assert_return(callback, -EINVAL);
1130 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1131 assert_return(!event_pid_changed(e), -ECHILD);
1132
1133 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1134 if (r < 0)
1135 return r;
1136
1137 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1138 return -EBUSY;
1139
1140 s = source_new(e, !ret, SOURCE_CHILD);
1141 if (!s)
1142 return -ENOMEM;
1143
1144 s->child.pid = pid;
1145 s->child.options = options;
1146 s->child.callback = callback;
1147 s->userdata = userdata;
1148 s->enabled = SD_EVENT_ONESHOT;
1149
1150 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1151 if (r < 0)
1152 return r;
1153
1154 e->n_enabled_child_sources++;
1155
1156 r = event_make_signal_data(e, SIGCHLD, NULL);
1157 if (r < 0) {
1158 e->n_enabled_child_sources--;
1159 return r;
1160 }
1161
1162 e->need_process_child = true;
1163
1164 if (ret)
1165 *ret = s;
1166 TAKE_PTR(s);
1167
1168 return 0;
1169 }
1170
1171 _public_ int sd_event_add_defer(
1172 sd_event *e,
1173 sd_event_source **ret,
1174 sd_event_handler_t callback,
1175 void *userdata) {
1176
1177 _cleanup_(source_freep) sd_event_source *s = NULL;
1178 int r;
1179
1180 assert_return(e, -EINVAL);
1181 assert_return(e = event_resolve(e), -ENOPKG);
1182 assert_return(callback, -EINVAL);
1183 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1184 assert_return(!event_pid_changed(e), -ECHILD);
1185
1186 s = source_new(e, !ret, SOURCE_DEFER);
1187 if (!s)
1188 return -ENOMEM;
1189
1190 s->defer.callback = callback;
1191 s->userdata = userdata;
1192 s->enabled = SD_EVENT_ONESHOT;
1193
1194 r = source_set_pending(s, true);
1195 if (r < 0)
1196 return r;
1197
1198 if (ret)
1199 *ret = s;
1200 TAKE_PTR(s);
1201
1202 return 0;
1203 }
1204
1205 _public_ int sd_event_add_post(
1206 sd_event *e,
1207 sd_event_source **ret,
1208 sd_event_handler_t callback,
1209 void *userdata) {
1210
1211 _cleanup_(source_freep) sd_event_source *s = NULL;
1212 int r;
1213
1214 assert_return(e, -EINVAL);
1215 assert_return(e = event_resolve(e), -ENOPKG);
1216 assert_return(callback, -EINVAL);
1217 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1218 assert_return(!event_pid_changed(e), -ECHILD);
1219
1220 r = set_ensure_allocated(&e->post_sources, NULL);
1221 if (r < 0)
1222 return r;
1223
1224 s = source_new(e, !ret, SOURCE_POST);
1225 if (!s)
1226 return -ENOMEM;
1227
1228 s->post.callback = callback;
1229 s->userdata = userdata;
1230 s->enabled = SD_EVENT_ON;
1231
1232 r = set_put(e->post_sources, s);
1233 if (r < 0)
1234 return r;
1235
1236 if (ret)
1237 *ret = s;
1238 TAKE_PTR(s);
1239
1240 return 0;
1241 }
1242
1243 _public_ int sd_event_add_exit(
1244 sd_event *e,
1245 sd_event_source **ret,
1246 sd_event_handler_t callback,
1247 void *userdata) {
1248
1249 _cleanup_(source_freep) sd_event_source *s = NULL;
1250 int r;
1251
1252 assert_return(e, -EINVAL);
1253 assert_return(e = event_resolve(e), -ENOPKG);
1254 assert_return(callback, -EINVAL);
1255 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1256 assert_return(!event_pid_changed(e), -ECHILD);
1257
1258 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1259 if (r < 0)
1260 return r;
1261
1262 s = source_new(e, !ret, SOURCE_EXIT);
1263 if (!s)
1264 return -ENOMEM;
1265
1266 s->exit.callback = callback;
1267 s->userdata = userdata;
1268 s->exit.prioq_index = PRIOQ_IDX_NULL;
1269 s->enabled = SD_EVENT_ONESHOT;
1270
1271 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1272 if (r < 0)
1273 return r;
1274
1275 if (ret)
1276 *ret = s;
1277 TAKE_PTR(s);
1278
1279 return 0;
1280 }
1281
1282 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1283 assert(e);
1284
1285 if (!d)
1286 return;
1287
1288 assert(hashmap_isempty(d->inodes));
1289 assert(hashmap_isempty(d->wd));
1290
1291 if (d->buffer_filled > 0)
1292 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1293
1294 hashmap_free(d->inodes);
1295 hashmap_free(d->wd);
1296
1297 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1298
1299 if (d->fd >= 0) {
1300 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1301 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1302
1303 safe_close(d->fd);
1304 }
1305 free(d);
1306 }
1307
1308 static int event_make_inotify_data(
1309 sd_event *e,
1310 int64_t priority,
1311 struct inotify_data **ret) {
1312
1313 _cleanup_close_ int fd = -1;
1314 struct inotify_data *d;
1315 struct epoll_event ev;
1316 int r;
1317
1318 assert(e);
1319
1320 d = hashmap_get(e->inotify_data, &priority);
1321 if (d) {
1322 if (ret)
1323 *ret = d;
1324 return 0;
1325 }
1326
1327 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1328 if (fd < 0)
1329 return -errno;
1330
1331 fd = fd_move_above_stdio(fd);
1332
1333 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1334 if (r < 0)
1335 return r;
1336
1337 d = new(struct inotify_data, 1);
1338 if (!d)
1339 return -ENOMEM;
1340
1341 *d = (struct inotify_data) {
1342 .wakeup = WAKEUP_INOTIFY_DATA,
1343 .fd = TAKE_FD(fd),
1344 .priority = priority,
1345 };
1346
1347 r = hashmap_put(e->inotify_data, &d->priority, d);
1348 if (r < 0) {
1349 d->fd = safe_close(d->fd);
1350 free(d);
1351 return r;
1352 }
1353
1354 ev = (struct epoll_event) {
1355 .events = EPOLLIN,
1356 .data.ptr = d,
1357 };
1358
1359 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1360 r = -errno;
1361 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1362 * remove the fd from the epoll first, which we don't want as we couldn't
1363 * add it in the first place. */
1364 event_free_inotify_data(e, d);
1365 return r;
1366 }
1367
1368 if (ret)
1369 *ret = d;
1370
1371 return 1;
1372 }
1373
1374 static int inode_data_compare(const void *a, const void *b) {
1375 const struct inode_data *x = a, *y = b;
1376 int r;
1377
1378 assert(x);
1379 assert(y);
1380
1381 r = CMP(x->dev, y->dev);
1382 if (r != 0)
1383 return r;
1384
1385 return CMP(x->ino, y->ino);
1386 }
1387
1388 static void inode_data_hash_func(const void *p, struct siphash *state) {
1389 const struct inode_data *d = p;
1390
1391 assert(p);
1392
1393 siphash24_compress(&d->dev, sizeof(d->dev), state);
1394 siphash24_compress(&d->ino, sizeof(d->ino), state);
1395 }
1396
1397 const struct hash_ops inode_data_hash_ops = {
1398 .hash = inode_data_hash_func,
1399 .compare = inode_data_compare
1400 };
1401
1402 static void event_free_inode_data(
1403 sd_event *e,
1404 struct inode_data *d) {
1405
1406 assert(e);
1407
1408 if (!d)
1409 return;
1410
1411 assert(!d->event_sources);
1412
1413 if (d->fd >= 0) {
1414 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1415 safe_close(d->fd);
1416 }
1417
1418 if (d->inotify_data) {
1419
1420 if (d->wd >= 0) {
1421 if (d->inotify_data->fd >= 0) {
1422 /* So here's a problem. At the time this runs the watch descriptor might already be
1423 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1424 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1425 * likely case to happen. */
1426
1427 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1428 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1429 }
1430
1431 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1432 }
1433
1434 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1435 }
1436
1437 free(d);
1438 }
1439
1440 static void event_gc_inode_data(
1441 sd_event *e,
1442 struct inode_data *d) {
1443
1444 struct inotify_data *inotify_data;
1445
1446 assert(e);
1447
1448 if (!d)
1449 return;
1450
1451 if (d->event_sources)
1452 return;
1453
1454 inotify_data = d->inotify_data;
1455 event_free_inode_data(e, d);
1456
1457 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1458 event_free_inotify_data(e, inotify_data);
1459 }
1460
1461 static int event_make_inode_data(
1462 sd_event *e,
1463 struct inotify_data *inotify_data,
1464 dev_t dev,
1465 ino_t ino,
1466 struct inode_data **ret) {
1467
1468 struct inode_data *d, key;
1469 int r;
1470
1471 assert(e);
1472 assert(inotify_data);
1473
1474 key = (struct inode_data) {
1475 .ino = ino,
1476 .dev = dev,
1477 };
1478
1479 d = hashmap_get(inotify_data->inodes, &key);
1480 if (d) {
1481 if (ret)
1482 *ret = d;
1483
1484 return 0;
1485 }
1486
1487 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1488 if (r < 0)
1489 return r;
1490
1491 d = new(struct inode_data, 1);
1492 if (!d)
1493 return -ENOMEM;
1494
1495 *d = (struct inode_data) {
1496 .dev = dev,
1497 .ino = ino,
1498 .wd = -1,
1499 .fd = -1,
1500 .inotify_data = inotify_data,
1501 };
1502
1503 r = hashmap_put(inotify_data->inodes, d, d);
1504 if (r < 0) {
1505 free(d);
1506 return r;
1507 }
1508
1509 if (ret)
1510 *ret = d;
1511
1512 return 1;
1513 }
1514
1515 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1516 bool excl_unlink = true;
1517 uint32_t combined = 0;
1518 sd_event_source *s;
1519
1520 assert(d);
1521
1522 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1523 * the IN_EXCL_UNLINK flag is ANDed instead.
1524 *
1525 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1526 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1527 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1528 * events we don't care for client-side. */
1529
1530 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1531
1532 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1533 excl_unlink = false;
1534
1535 combined |= s->inotify.mask;
1536 }
1537
1538 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1539 }
1540
1541 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1542 uint32_t combined_mask;
1543 int wd, r;
1544
1545 assert(d);
1546 assert(d->fd >= 0);
1547
1548 combined_mask = inode_data_determine_mask(d);
1549
1550 if (d->wd >= 0 && combined_mask == d->combined_mask)
1551 return 0;
1552
1553 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1554 if (r < 0)
1555 return r;
1556
1557 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1558 if (wd < 0)
1559 return -errno;
1560
1561 if (d->wd < 0) {
1562 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1563 if (r < 0) {
1564 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1565 return r;
1566 }
1567
1568 d->wd = wd;
1569
1570 } else if (d->wd != wd) {
1571
1572 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1573 (void) inotify_rm_watch(d->fd, wd);
1574 return -EINVAL;
1575 }
1576
1577 d->combined_mask = combined_mask;
1578 return 1;
1579 }
1580
1581 _public_ int sd_event_add_inotify(
1582 sd_event *e,
1583 sd_event_source **ret,
1584 const char *path,
1585 uint32_t mask,
1586 sd_event_inotify_handler_t callback,
1587 void *userdata) {
1588
1589 struct inotify_data *inotify_data = NULL;
1590 struct inode_data *inode_data = NULL;
1591 _cleanup_close_ int fd = -1;
1592 _cleanup_(source_freep) sd_event_source *s = NULL;
1593 struct stat st;
1594 int r;
1595
1596 assert_return(e, -EINVAL);
1597 assert_return(e = event_resolve(e), -ENOPKG);
1598 assert_return(path, -EINVAL);
1599 assert_return(callback, -EINVAL);
1600 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1601 assert_return(!event_pid_changed(e), -ECHILD);
1602
1603 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1604 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1605 * the user can't use them for us. */
1606 if (mask & IN_MASK_ADD)
1607 return -EINVAL;
1608
1609 fd = open(path, O_PATH|O_CLOEXEC|
1610 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1611 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1612 if (fd < 0)
1613 return -errno;
1614
1615 if (fstat(fd, &st) < 0)
1616 return -errno;
1617
1618 s = source_new(e, !ret, SOURCE_INOTIFY);
1619 if (!s)
1620 return -ENOMEM;
1621
1622 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1623 s->inotify.mask = mask;
1624 s->inotify.callback = callback;
1625 s->userdata = userdata;
1626
1627 /* Allocate an inotify object for this priority, and an inode object within it */
1628 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1629 if (r < 0)
1630 return r;
1631
1632 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1633 if (r < 0) {
1634 event_free_inotify_data(e, inotify_data);
1635 return r;
1636 }
1637
1638 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1639 * the event source, until then, for which we need the original inode. */
1640 if (inode_data->fd < 0) {
1641 inode_data->fd = TAKE_FD(fd);
1642 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1643 }
1644
1645 /* Link our event source to the inode data object */
1646 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1647 s->inotify.inode_data = inode_data;
1648
1649 /* Actually realize the watch now */
1650 r = inode_data_realize_watch(e, inode_data);
1651 if (r < 0)
1652 return r;
1653
1654 (void) sd_event_source_set_description(s, path);
1655
1656 if (ret)
1657 *ret = s;
1658 TAKE_PTR(s);
1659
1660 return 0;
1661 }
1662
1663 static sd_event_source* event_source_free(sd_event_source *s) {
1664 if (!s)
1665 return NULL;
1666
1667 /* Here's a special hack: when we are called from a
1668 * dispatch handler we won't free the event source
1669 * immediately, but we will detach the fd from the
1670 * epoll. This way it is safe for the caller to unref
1671 * the event source and immediately close the fd, but
1672 * we still retain a valid event source object after
1673 * the callback. */
1674
1675 if (s->dispatching) {
1676 if (s->type == SOURCE_IO)
1677 source_io_unregister(s);
1678
1679 source_disconnect(s);
1680 } else
1681 source_free(s);
1682
1683 return NULL;
1684 }
1685
1686 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1687
1688 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1689 assert_return(s, -EINVAL);
1690 assert_return(!event_pid_changed(s->event), -ECHILD);
1691
1692 return free_and_strdup(&s->description, description);
1693 }
1694
1695 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1696 assert_return(s, -EINVAL);
1697 assert_return(description, -EINVAL);
1698 assert_return(!event_pid_changed(s->event), -ECHILD);
1699
1700 if (!s->description)
1701 return -ENXIO;
1702
1703 *description = s->description;
1704 return 0;
1705 }
1706
1707 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1708 assert_return(s, NULL);
1709
1710 return s->event;
1711 }
1712
1713 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1714 assert_return(s, -EINVAL);
1715 assert_return(s->type != SOURCE_EXIT, -EDOM);
1716 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1717 assert_return(!event_pid_changed(s->event), -ECHILD);
1718
1719 return s->pending;
1720 }
1721
1722 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1723 assert_return(s, -EINVAL);
1724 assert_return(s->type == SOURCE_IO, -EDOM);
1725 assert_return(!event_pid_changed(s->event), -ECHILD);
1726
1727 return s->io.fd;
1728 }
1729
1730 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1731 int r;
1732
1733 assert_return(s, -EINVAL);
1734 assert_return(fd >= 0, -EBADF);
1735 assert_return(s->type == SOURCE_IO, -EDOM);
1736 assert_return(!event_pid_changed(s->event), -ECHILD);
1737
1738 if (s->io.fd == fd)
1739 return 0;
1740
1741 if (s->enabled == SD_EVENT_OFF) {
1742 s->io.fd = fd;
1743 s->io.registered = false;
1744 } else {
1745 int saved_fd;
1746
1747 saved_fd = s->io.fd;
1748 assert(s->io.registered);
1749
1750 s->io.fd = fd;
1751 s->io.registered = false;
1752
1753 r = source_io_register(s, s->enabled, s->io.events);
1754 if (r < 0) {
1755 s->io.fd = saved_fd;
1756 s->io.registered = true;
1757 return r;
1758 }
1759
1760 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1761 }
1762
1763 return 0;
1764 }
1765
1766 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1767 assert_return(s, -EINVAL);
1768 assert_return(s->type == SOURCE_IO, -EDOM);
1769
1770 return s->io.owned;
1771 }
1772
1773 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1774 assert_return(s, -EINVAL);
1775 assert_return(s->type == SOURCE_IO, -EDOM);
1776
1777 s->io.owned = own;
1778 return 0;
1779 }
1780
1781 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1782 assert_return(s, -EINVAL);
1783 assert_return(events, -EINVAL);
1784 assert_return(s->type == SOURCE_IO, -EDOM);
1785 assert_return(!event_pid_changed(s->event), -ECHILD);
1786
1787 *events = s->io.events;
1788 return 0;
1789 }
1790
1791 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1792 int r;
1793
1794 assert_return(s, -EINVAL);
1795 assert_return(s->type == SOURCE_IO, -EDOM);
1796 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1797 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1798 assert_return(!event_pid_changed(s->event), -ECHILD);
1799
1800 /* edge-triggered updates are never skipped, so we can reset edges */
1801 if (s->io.events == events && !(events & EPOLLET))
1802 return 0;
1803
1804 r = source_set_pending(s, false);
1805 if (r < 0)
1806 return r;
1807
1808 if (s->enabled != SD_EVENT_OFF) {
1809 r = source_io_register(s, s->enabled, events);
1810 if (r < 0)
1811 return r;
1812 }
1813
1814 s->io.events = events;
1815
1816 return 0;
1817 }
1818
1819 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1820 assert_return(s, -EINVAL);
1821 assert_return(revents, -EINVAL);
1822 assert_return(s->type == SOURCE_IO, -EDOM);
1823 assert_return(s->pending, -ENODATA);
1824 assert_return(!event_pid_changed(s->event), -ECHILD);
1825
1826 *revents = s->io.revents;
1827 return 0;
1828 }
1829
1830 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1831 assert_return(s, -EINVAL);
1832 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1833 assert_return(!event_pid_changed(s->event), -ECHILD);
1834
1835 return s->signal.sig;
1836 }
1837
1838 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1839 assert_return(s, -EINVAL);
1840 assert_return(!event_pid_changed(s->event), -ECHILD);
1841
1842 *priority = s->priority;
1843 return 0;
1844 }
1845
1846 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1847 bool rm_inotify = false, rm_inode = false;
1848 struct inotify_data *new_inotify_data = NULL;
1849 struct inode_data *new_inode_data = NULL;
1850 int r;
1851
1852 assert_return(s, -EINVAL);
1853 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1854 assert_return(!event_pid_changed(s->event), -ECHILD);
1855
1856 if (s->priority == priority)
1857 return 0;
1858
1859 if (s->type == SOURCE_INOTIFY) {
1860 struct inode_data *old_inode_data;
1861
1862 assert(s->inotify.inode_data);
1863 old_inode_data = s->inotify.inode_data;
1864
1865 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1866 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1867 * events we allow priority changes only until the first following iteration. */
1868 if (old_inode_data->fd < 0)
1869 return -EOPNOTSUPP;
1870
1871 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
1872 if (r < 0)
1873 return r;
1874 rm_inotify = r > 0;
1875
1876 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
1877 if (r < 0)
1878 goto fail;
1879 rm_inode = r > 0;
1880
1881 if (new_inode_data->fd < 0) {
1882 /* Duplicate the fd for the new inode object if we don't have any yet */
1883 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
1884 if (new_inode_data->fd < 0) {
1885 r = -errno;
1886 goto fail;
1887 }
1888
1889 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
1890 }
1891
1892 /* Move the event source to the new inode data structure */
1893 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
1894 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
1895 s->inotify.inode_data = new_inode_data;
1896
1897 /* Now create the new watch */
1898 r = inode_data_realize_watch(s->event, new_inode_data);
1899 if (r < 0) {
1900 /* Move it back */
1901 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
1902 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
1903 s->inotify.inode_data = old_inode_data;
1904 goto fail;
1905 }
1906
1907 s->priority = priority;
1908
1909 event_gc_inode_data(s->event, old_inode_data);
1910
1911 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1912 struct signal_data *old, *d;
1913
1914 /* Move us from the signalfd belonging to the old
1915 * priority to the signalfd of the new priority */
1916
1917 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1918
1919 s->priority = priority;
1920
1921 r = event_make_signal_data(s->event, s->signal.sig, &d);
1922 if (r < 0) {
1923 s->priority = old->priority;
1924 return r;
1925 }
1926
1927 event_unmask_signal_data(s->event, old, s->signal.sig);
1928 } else
1929 s->priority = priority;
1930
1931 if (s->pending)
1932 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1933
1934 if (s->prepare)
1935 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1936
1937 if (s->type == SOURCE_EXIT)
1938 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1939
1940 return 0;
1941
1942 fail:
1943 if (rm_inode)
1944 event_free_inode_data(s->event, new_inode_data);
1945
1946 if (rm_inotify)
1947 event_free_inotify_data(s->event, new_inotify_data);
1948
1949 return r;
1950 }
1951
1952 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1953 assert_return(s, -EINVAL);
1954 assert_return(!event_pid_changed(s->event), -ECHILD);
1955
1956 if (m)
1957 *m = s->enabled;
1958 return s->enabled != SD_EVENT_OFF;
1959 }
1960
1961 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1962 int r;
1963
1964 assert_return(s, -EINVAL);
1965 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1966 assert_return(!event_pid_changed(s->event), -ECHILD);
1967
1968 /* If we are dead anyway, we are fine with turning off
1969 * sources, but everything else needs to fail. */
1970 if (s->event->state == SD_EVENT_FINISHED)
1971 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1972
1973 if (s->enabled == m)
1974 return 0;
1975
1976 if (m == SD_EVENT_OFF) {
1977
1978 /* Unset the pending flag when this event source is disabled */
1979 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1980 r = source_set_pending(s, false);
1981 if (r < 0)
1982 return r;
1983 }
1984
1985 switch (s->type) {
1986
1987 case SOURCE_IO:
1988 source_io_unregister(s);
1989 s->enabled = m;
1990 break;
1991
1992 case SOURCE_TIME_REALTIME:
1993 case SOURCE_TIME_BOOTTIME:
1994 case SOURCE_TIME_MONOTONIC:
1995 case SOURCE_TIME_REALTIME_ALARM:
1996 case SOURCE_TIME_BOOTTIME_ALARM: {
1997 struct clock_data *d;
1998
1999 s->enabled = m;
2000 d = event_get_clock_data(s->event, s->type);
2001 assert(d);
2002
2003 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2004 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2005 d->needs_rearm = true;
2006 break;
2007 }
2008
2009 case SOURCE_SIGNAL:
2010 s->enabled = m;
2011
2012 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2013 break;
2014
2015 case SOURCE_CHILD:
2016 s->enabled = m;
2017
2018 assert(s->event->n_enabled_child_sources > 0);
2019 s->event->n_enabled_child_sources--;
2020
2021 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2022 break;
2023
2024 case SOURCE_EXIT:
2025 s->enabled = m;
2026 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2027 break;
2028
2029 case SOURCE_DEFER:
2030 case SOURCE_POST:
2031 case SOURCE_INOTIFY:
2032 s->enabled = m;
2033 break;
2034
2035 default:
2036 assert_not_reached("Wut? I shouldn't exist.");
2037 }
2038
2039 } else {
2040
2041 /* Unset the pending flag when this event source is enabled */
2042 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2043 r = source_set_pending(s, false);
2044 if (r < 0)
2045 return r;
2046 }
2047
2048 switch (s->type) {
2049
2050 case SOURCE_IO:
2051 r = source_io_register(s, m, s->io.events);
2052 if (r < 0)
2053 return r;
2054
2055 s->enabled = m;
2056 break;
2057
2058 case SOURCE_TIME_REALTIME:
2059 case SOURCE_TIME_BOOTTIME:
2060 case SOURCE_TIME_MONOTONIC:
2061 case SOURCE_TIME_REALTIME_ALARM:
2062 case SOURCE_TIME_BOOTTIME_ALARM: {
2063 struct clock_data *d;
2064
2065 s->enabled = m;
2066 d = event_get_clock_data(s->event, s->type);
2067 assert(d);
2068
2069 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2070 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2071 d->needs_rearm = true;
2072 break;
2073 }
2074
2075 case SOURCE_SIGNAL:
2076
2077 s->enabled = m;
2078
2079 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2080 if (r < 0) {
2081 s->enabled = SD_EVENT_OFF;
2082 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2083 return r;
2084 }
2085
2086 break;
2087
2088 case SOURCE_CHILD:
2089
2090 if (s->enabled == SD_EVENT_OFF)
2091 s->event->n_enabled_child_sources++;
2092
2093 s->enabled = m;
2094
2095 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2096 if (r < 0) {
2097 s->enabled = SD_EVENT_OFF;
2098 s->event->n_enabled_child_sources--;
2099 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2100 return r;
2101 }
2102
2103 break;
2104
2105 case SOURCE_EXIT:
2106 s->enabled = m;
2107 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2108 break;
2109
2110 case SOURCE_DEFER:
2111 case SOURCE_POST:
2112 case SOURCE_INOTIFY:
2113 s->enabled = m;
2114 break;
2115
2116 default:
2117 assert_not_reached("Wut? I shouldn't exist.");
2118 }
2119 }
2120
2121 if (s->pending)
2122 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2123
2124 if (s->prepare)
2125 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2126
2127 return 0;
2128 }
2129
2130 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2131 assert_return(s, -EINVAL);
2132 assert_return(usec, -EINVAL);
2133 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2134 assert_return(!event_pid_changed(s->event), -ECHILD);
2135
2136 *usec = s->time.next;
2137 return 0;
2138 }
2139
2140 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2141 struct clock_data *d;
2142 int r;
2143
2144 assert_return(s, -EINVAL);
2145 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2146 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2147 assert_return(!event_pid_changed(s->event), -ECHILD);
2148
2149 r = source_set_pending(s, false);
2150 if (r < 0)
2151 return r;
2152
2153 s->time.next = usec;
2154
2155 d = event_get_clock_data(s->event, s->type);
2156 assert(d);
2157
2158 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2159 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2160 d->needs_rearm = true;
2161
2162 return 0;
2163 }
2164
2165 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2166 assert_return(s, -EINVAL);
2167 assert_return(usec, -EINVAL);
2168 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2169 assert_return(!event_pid_changed(s->event), -ECHILD);
2170
2171 *usec = s->time.accuracy;
2172 return 0;
2173 }
2174
2175 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2176 struct clock_data *d;
2177 int r;
2178
2179 assert_return(s, -EINVAL);
2180 assert_return(usec != (uint64_t) -1, -EINVAL);
2181 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2182 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2183 assert_return(!event_pid_changed(s->event), -ECHILD);
2184
2185 r = source_set_pending(s, false);
2186 if (r < 0)
2187 return r;
2188
2189 if (usec == 0)
2190 usec = DEFAULT_ACCURACY_USEC;
2191
2192 s->time.accuracy = usec;
2193
2194 d = event_get_clock_data(s->event, s->type);
2195 assert(d);
2196
2197 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2198 d->needs_rearm = true;
2199
2200 return 0;
2201 }
2202
2203 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2204 assert_return(s, -EINVAL);
2205 assert_return(clock, -EINVAL);
2206 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2207 assert_return(!event_pid_changed(s->event), -ECHILD);
2208
2209 *clock = event_source_type_to_clock(s->type);
2210 return 0;
2211 }
2212
2213 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2214 assert_return(s, -EINVAL);
2215 assert_return(pid, -EINVAL);
2216 assert_return(s->type == SOURCE_CHILD, -EDOM);
2217 assert_return(!event_pid_changed(s->event), -ECHILD);
2218
2219 *pid = s->child.pid;
2220 return 0;
2221 }
2222
2223 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2224 assert_return(s, -EINVAL);
2225 assert_return(mask, -EINVAL);
2226 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2227 assert_return(!event_pid_changed(s->event), -ECHILD);
2228
2229 *mask = s->inotify.mask;
2230 return 0;
2231 }
2232
2233 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2234 int r;
2235
2236 assert_return(s, -EINVAL);
2237 assert_return(s->type != SOURCE_EXIT, -EDOM);
2238 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2239 assert_return(!event_pid_changed(s->event), -ECHILD);
2240
2241 if (s->prepare == callback)
2242 return 0;
2243
2244 if (callback && s->prepare) {
2245 s->prepare = callback;
2246 return 0;
2247 }
2248
2249 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2250 if (r < 0)
2251 return r;
2252
2253 s->prepare = callback;
2254
2255 if (callback) {
2256 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2257 if (r < 0)
2258 return r;
2259 } else
2260 prioq_remove(s->event->prepare, s, &s->prepare_index);
2261
2262 return 0;
2263 }
2264
2265 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2266 assert_return(s, NULL);
2267
2268 return s->userdata;
2269 }
2270
2271 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2272 void *ret;
2273
2274 assert_return(s, NULL);
2275
2276 ret = s->userdata;
2277 s->userdata = userdata;
2278
2279 return ret;
2280 }
2281
2282 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2283 usec_t c;
2284 assert(e);
2285 assert(a <= b);
2286
2287 if (a <= 0)
2288 return 0;
2289 if (a >= USEC_INFINITY)
2290 return USEC_INFINITY;
2291
2292 if (b <= a + 1)
2293 return a;
2294
2295 initialize_perturb(e);
2296
2297 /*
2298 Find a good time to wake up again between times a and b. We
2299 have two goals here:
2300
2301 a) We want to wake up as seldom as possible, hence prefer
2302 later times over earlier times.
2303
2304 b) But if we have to wake up, then let's make sure to
2305 dispatch as much as possible on the entire system.
2306
2307 We implement this by waking up everywhere at the same time
2308 within any given minute if we can, synchronised via the
2309 perturbation value determined from the boot ID. If we can't,
2310 then we try to find the same spot in every 10s, then 1s and
2311 then 250ms step. Otherwise, we pick the last possible time
2312 to wake up.
2313 */
2314
2315 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2316 if (c >= b) {
2317 if (_unlikely_(c < USEC_PER_MINUTE))
2318 return b;
2319
2320 c -= USEC_PER_MINUTE;
2321 }
2322
2323 if (c >= a)
2324 return c;
2325
2326 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2327 if (c >= b) {
2328 if (_unlikely_(c < USEC_PER_SEC*10))
2329 return b;
2330
2331 c -= USEC_PER_SEC*10;
2332 }
2333
2334 if (c >= a)
2335 return c;
2336
2337 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2338 if (c >= b) {
2339 if (_unlikely_(c < USEC_PER_SEC))
2340 return b;
2341
2342 c -= USEC_PER_SEC;
2343 }
2344
2345 if (c >= a)
2346 return c;
2347
2348 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2349 if (c >= b) {
2350 if (_unlikely_(c < USEC_PER_MSEC*250))
2351 return b;
2352
2353 c -= USEC_PER_MSEC*250;
2354 }
2355
2356 if (c >= a)
2357 return c;
2358
2359 return b;
2360 }
2361
2362 static int event_arm_timer(
2363 sd_event *e,
2364 struct clock_data *d) {
2365
2366 struct itimerspec its = {};
2367 sd_event_source *a, *b;
2368 usec_t t;
2369 int r;
2370
2371 assert(e);
2372 assert(d);
2373
2374 if (!d->needs_rearm)
2375 return 0;
2376 else
2377 d->needs_rearm = false;
2378
2379 a = prioq_peek(d->earliest);
2380 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2381
2382 if (d->fd < 0)
2383 return 0;
2384
2385 if (d->next == USEC_INFINITY)
2386 return 0;
2387
2388 /* disarm */
2389 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2390 if (r < 0)
2391 return r;
2392
2393 d->next = USEC_INFINITY;
2394 return 0;
2395 }
2396
2397 b = prioq_peek(d->latest);
2398 assert_se(b && b->enabled != SD_EVENT_OFF);
2399
2400 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2401 if (d->next == t)
2402 return 0;
2403
2404 assert_se(d->fd >= 0);
2405
2406 if (t == 0) {
2407 /* We don' want to disarm here, just mean some time looooong ago. */
2408 its.it_value.tv_sec = 0;
2409 its.it_value.tv_nsec = 1;
2410 } else
2411 timespec_store(&its.it_value, t);
2412
2413 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2414 if (r < 0)
2415 return -errno;
2416
2417 d->next = t;
2418 return 0;
2419 }
2420
2421 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2422 assert(e);
2423 assert(s);
2424 assert(s->type == SOURCE_IO);
2425
2426 /* If the event source was already pending, we just OR in the
2427 * new revents, otherwise we reset the value. The ORing is
2428 * necessary to handle EPOLLONESHOT events properly where
2429 * readability might happen independently of writability, and
2430 * we need to keep track of both */
2431
2432 if (s->pending)
2433 s->io.revents |= revents;
2434 else
2435 s->io.revents = revents;
2436
2437 return source_set_pending(s, true);
2438 }
2439
2440 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2441 uint64_t x;
2442 ssize_t ss;
2443
2444 assert(e);
2445 assert(fd >= 0);
2446
2447 assert_return(events == EPOLLIN, -EIO);
2448
2449 ss = read(fd, &x, sizeof(x));
2450 if (ss < 0) {
2451 if (IN_SET(errno, EAGAIN, EINTR))
2452 return 0;
2453
2454 return -errno;
2455 }
2456
2457 if (_unlikely_(ss != sizeof(x)))
2458 return -EIO;
2459
2460 if (next)
2461 *next = USEC_INFINITY;
2462
2463 return 0;
2464 }
2465
2466 static int process_timer(
2467 sd_event *e,
2468 usec_t n,
2469 struct clock_data *d) {
2470
2471 sd_event_source *s;
2472 int r;
2473
2474 assert(e);
2475 assert(d);
2476
2477 for (;;) {
2478 s = prioq_peek(d->earliest);
2479 if (!s ||
2480 s->time.next > n ||
2481 s->enabled == SD_EVENT_OFF ||
2482 s->pending)
2483 break;
2484
2485 r = source_set_pending(s, true);
2486 if (r < 0)
2487 return r;
2488
2489 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2490 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2491 d->needs_rearm = true;
2492 }
2493
2494 return 0;
2495 }
2496
2497 static int process_child(sd_event *e) {
2498 sd_event_source *s;
2499 Iterator i;
2500 int r;
2501
2502 assert(e);
2503
2504 e->need_process_child = false;
2505
2506 /*
2507 So, this is ugly. We iteratively invoke waitid() with P_PID
2508 + WNOHANG for each PID we wait for, instead of using
2509 P_ALL. This is because we only want to get child
2510 information of very specific child processes, and not all
2511 of them. We might not have processed the SIGCHLD even of a
2512 previous invocation and we don't want to maintain a
2513 unbounded *per-child* event queue, hence we really don't
2514 want anything flushed out of the kernel's queue that we
2515 don't care about. Since this is O(n) this means that if you
2516 have a lot of processes you probably want to handle SIGCHLD
2517 yourself.
2518
2519 We do not reap the children here (by using WNOWAIT), this
2520 is only done after the event source is dispatched so that
2521 the callback still sees the process as a zombie.
2522 */
2523
2524 HASHMAP_FOREACH(s, e->child_sources, i) {
2525 assert(s->type == SOURCE_CHILD);
2526
2527 if (s->pending)
2528 continue;
2529
2530 if (s->enabled == SD_EVENT_OFF)
2531 continue;
2532
2533 zero(s->child.siginfo);
2534 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2535 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2536 if (r < 0)
2537 return -errno;
2538
2539 if (s->child.siginfo.si_pid != 0) {
2540 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2541
2542 if (!zombie && (s->child.options & WEXITED)) {
2543 /* If the child isn't dead then let's
2544 * immediately remove the state change
2545 * from the queue, since there's no
2546 * benefit in leaving it queued */
2547
2548 assert(s->child.options & (WSTOPPED|WCONTINUED));
2549 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2550 }
2551
2552 r = source_set_pending(s, true);
2553 if (r < 0)
2554 return r;
2555 }
2556 }
2557
2558 return 0;
2559 }
2560
2561 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2562 bool read_one = false;
2563 int r;
2564
2565 assert(e);
2566 assert(d);
2567 assert_return(events == EPOLLIN, -EIO);
2568
2569 /* If there's a signal queued on this priority and SIGCHLD is
2570 on this priority too, then make sure to recheck the
2571 children we watch. This is because we only ever dequeue
2572 the first signal per priority, and if we dequeue one, and
2573 SIGCHLD might be enqueued later we wouldn't know, but we
2574 might have higher priority children we care about hence we
2575 need to check that explicitly. */
2576
2577 if (sigismember(&d->sigset, SIGCHLD))
2578 e->need_process_child = true;
2579
2580 /* If there's already an event source pending for this
2581 * priority we don't read another */
2582 if (d->current)
2583 return 0;
2584
2585 for (;;) {
2586 struct signalfd_siginfo si;
2587 ssize_t n;
2588 sd_event_source *s = NULL;
2589
2590 n = read(d->fd, &si, sizeof(si));
2591 if (n < 0) {
2592 if (IN_SET(errno, EAGAIN, EINTR))
2593 return read_one;
2594
2595 return -errno;
2596 }
2597
2598 if (_unlikely_(n != sizeof(si)))
2599 return -EIO;
2600
2601 assert(SIGNAL_VALID(si.ssi_signo));
2602
2603 read_one = true;
2604
2605 if (e->signal_sources)
2606 s = e->signal_sources[si.ssi_signo];
2607 if (!s)
2608 continue;
2609 if (s->pending)
2610 continue;
2611
2612 s->signal.siginfo = si;
2613 d->current = s;
2614
2615 r = source_set_pending(s, true);
2616 if (r < 0)
2617 return r;
2618
2619 return 1;
2620 }
2621 }
2622
2623 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2624 ssize_t n;
2625
2626 assert(e);
2627 assert(d);
2628
2629 assert_return(revents == EPOLLIN, -EIO);
2630
2631 /* If there's already an event source pending for this priority, don't read another */
2632 if (d->n_pending > 0)
2633 return 0;
2634
2635 /* Is the read buffer non-empty? If so, let's not read more */
2636 if (d->buffer_filled > 0)
2637 return 0;
2638
2639 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2640 if (n < 0) {
2641 if (IN_SET(errno, EAGAIN, EINTR))
2642 return 0;
2643
2644 return -errno;
2645 }
2646
2647 assert(n > 0);
2648 d->buffer_filled = (size_t) n;
2649 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2650
2651 return 1;
2652 }
2653
2654 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2655 assert(e);
2656 assert(d);
2657 assert(sz <= d->buffer_filled);
2658
2659 if (sz == 0)
2660 return;
2661
2662 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2663 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2664 d->buffer_filled -= sz;
2665
2666 if (d->buffer_filled == 0)
2667 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2668 }
2669
2670 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2671 int r;
2672
2673 assert(e);
2674 assert(d);
2675
2676 /* If there's already an event source pending for this priority, don't read another */
2677 if (d->n_pending > 0)
2678 return 0;
2679
2680 while (d->buffer_filled > 0) {
2681 size_t sz;
2682
2683 /* Let's validate that the event structures are complete */
2684 if (d->buffer_filled < offsetof(struct inotify_event, name))
2685 return -EIO;
2686
2687 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2688 if (d->buffer_filled < sz)
2689 return -EIO;
2690
2691 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2692 struct inode_data *inode_data;
2693 Iterator i;
2694
2695 /* The queue overran, let's pass this event to all event sources connected to this inotify
2696 * object */
2697
2698 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2699 sd_event_source *s;
2700
2701 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2702
2703 if (s->enabled == SD_EVENT_OFF)
2704 continue;
2705
2706 r = source_set_pending(s, true);
2707 if (r < 0)
2708 return r;
2709 }
2710 }
2711 } else {
2712 struct inode_data *inode_data;
2713 sd_event_source *s;
2714
2715 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2716 * our watch descriptor table. */
2717 if (d->buffer.ev.mask & IN_IGNORED) {
2718
2719 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2720 if (!inode_data) {
2721 event_inotify_data_drop(e, d, sz);
2722 continue;
2723 }
2724
2725 /* The watch descriptor was removed by the kernel, let's drop it here too */
2726 inode_data->wd = -1;
2727 } else {
2728 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2729 if (!inode_data) {
2730 event_inotify_data_drop(e, d, sz);
2731 continue;
2732 }
2733 }
2734
2735 /* Trigger all event sources that are interested in these events. Also trigger all event
2736 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2737 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2738
2739 if (s->enabled == SD_EVENT_OFF)
2740 continue;
2741
2742 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2743 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2744 continue;
2745
2746 r = source_set_pending(s, true);
2747 if (r < 0)
2748 return r;
2749 }
2750 }
2751
2752 /* Something pending now? If so, let's finish, otherwise let's read more. */
2753 if (d->n_pending > 0)
2754 return 1;
2755 }
2756
2757 return 0;
2758 }
2759
2760 static int process_inotify(sd_event *e) {
2761 struct inotify_data *d;
2762 int r, done = 0;
2763
2764 assert(e);
2765
2766 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2767 r = event_inotify_data_process(e, d);
2768 if (r < 0)
2769 return r;
2770 if (r > 0)
2771 done ++;
2772 }
2773
2774 return done;
2775 }
2776
2777 static int source_dispatch(sd_event_source *s) {
2778 EventSourceType saved_type;
2779 int r = 0;
2780
2781 assert(s);
2782 assert(s->pending || s->type == SOURCE_EXIT);
2783
2784 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2785 * the event. */
2786 saved_type = s->type;
2787
2788 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2789 r = source_set_pending(s, false);
2790 if (r < 0)
2791 return r;
2792 }
2793
2794 if (s->type != SOURCE_POST) {
2795 sd_event_source *z;
2796 Iterator i;
2797
2798 /* If we execute a non-post source, let's mark all
2799 * post sources as pending */
2800
2801 SET_FOREACH(z, s->event->post_sources, i) {
2802 if (z->enabled == SD_EVENT_OFF)
2803 continue;
2804
2805 r = source_set_pending(z, true);
2806 if (r < 0)
2807 return r;
2808 }
2809 }
2810
2811 if (s->enabled == SD_EVENT_ONESHOT) {
2812 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2813 if (r < 0)
2814 return r;
2815 }
2816
2817 s->dispatching = true;
2818
2819 switch (s->type) {
2820
2821 case SOURCE_IO:
2822 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2823 break;
2824
2825 case SOURCE_TIME_REALTIME:
2826 case SOURCE_TIME_BOOTTIME:
2827 case SOURCE_TIME_MONOTONIC:
2828 case SOURCE_TIME_REALTIME_ALARM:
2829 case SOURCE_TIME_BOOTTIME_ALARM:
2830 r = s->time.callback(s, s->time.next, s->userdata);
2831 break;
2832
2833 case SOURCE_SIGNAL:
2834 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2835 break;
2836
2837 case SOURCE_CHILD: {
2838 bool zombie;
2839
2840 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2841
2842 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2843
2844 /* Now, reap the PID for good. */
2845 if (zombie)
2846 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2847
2848 break;
2849 }
2850
2851 case SOURCE_DEFER:
2852 r = s->defer.callback(s, s->userdata);
2853 break;
2854
2855 case SOURCE_POST:
2856 r = s->post.callback(s, s->userdata);
2857 break;
2858
2859 case SOURCE_EXIT:
2860 r = s->exit.callback(s, s->userdata);
2861 break;
2862
2863 case SOURCE_INOTIFY: {
2864 struct sd_event *e = s->event;
2865 struct inotify_data *d;
2866 size_t sz;
2867
2868 assert(s->inotify.inode_data);
2869 assert_se(d = s->inotify.inode_data->inotify_data);
2870
2871 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
2872 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2873 assert(d->buffer_filled >= sz);
2874
2875 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
2876
2877 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2878 * buffer. */
2879 if (d->n_pending == 0)
2880 event_inotify_data_drop(e, d, sz);
2881
2882 break;
2883 }
2884
2885 case SOURCE_WATCHDOG:
2886 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2887 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2888 assert_not_reached("Wut? I shouldn't exist.");
2889 }
2890
2891 s->dispatching = false;
2892
2893 if (r < 0)
2894 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2895 strna(s->description), event_source_type_to_string(saved_type));
2896
2897 if (s->n_ref == 0)
2898 source_free(s);
2899 else if (r < 0)
2900 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2901
2902 return 1;
2903 }
2904
2905 static int event_prepare(sd_event *e) {
2906 int r;
2907
2908 assert(e);
2909
2910 for (;;) {
2911 sd_event_source *s;
2912
2913 s = prioq_peek(e->prepare);
2914 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2915 break;
2916
2917 s->prepare_iteration = e->iteration;
2918 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2919 if (r < 0)
2920 return r;
2921
2922 assert(s->prepare);
2923
2924 s->dispatching = true;
2925 r = s->prepare(s, s->userdata);
2926 s->dispatching = false;
2927
2928 if (r < 0)
2929 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2930 strna(s->description), event_source_type_to_string(s->type));
2931
2932 if (s->n_ref == 0)
2933 source_free(s);
2934 else if (r < 0)
2935 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2936 }
2937
2938 return 0;
2939 }
2940
2941 static int dispatch_exit(sd_event *e) {
2942 sd_event_source *p;
2943 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2944 int r;
2945
2946 assert(e);
2947
2948 p = prioq_peek(e->exit);
2949 if (!p || p->enabled == SD_EVENT_OFF) {
2950 e->state = SD_EVENT_FINISHED;
2951 return 0;
2952 }
2953
2954 ref = sd_event_ref(e);
2955 e->iteration++;
2956 e->state = SD_EVENT_EXITING;
2957 r = source_dispatch(p);
2958 e->state = SD_EVENT_INITIAL;
2959 return r;
2960 }
2961
2962 static sd_event_source* event_next_pending(sd_event *e) {
2963 sd_event_source *p;
2964
2965 assert(e);
2966
2967 p = prioq_peek(e->pending);
2968 if (!p)
2969 return NULL;
2970
2971 if (p->enabled == SD_EVENT_OFF)
2972 return NULL;
2973
2974 return p;
2975 }
2976
2977 static int arm_watchdog(sd_event *e) {
2978 struct itimerspec its = {};
2979 usec_t t;
2980 int r;
2981
2982 assert(e);
2983 assert(e->watchdog_fd >= 0);
2984
2985 t = sleep_between(e,
2986 e->watchdog_last + (e->watchdog_period / 2),
2987 e->watchdog_last + (e->watchdog_period * 3 / 4));
2988
2989 timespec_store(&its.it_value, t);
2990
2991 /* Make sure we never set the watchdog to 0, which tells the
2992 * kernel to disable it. */
2993 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2994 its.it_value.tv_nsec = 1;
2995
2996 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2997 if (r < 0)
2998 return -errno;
2999
3000 return 0;
3001 }
3002
3003 static int process_watchdog(sd_event *e) {
3004 assert(e);
3005
3006 if (!e->watchdog)
3007 return 0;
3008
3009 /* Don't notify watchdog too often */
3010 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3011 return 0;
3012
3013 sd_notify(false, "WATCHDOG=1");
3014 e->watchdog_last = e->timestamp.monotonic;
3015
3016 return arm_watchdog(e);
3017 }
3018
3019 static void event_close_inode_data_fds(sd_event *e) {
3020 struct inode_data *d;
3021
3022 assert(e);
3023
3024 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3025 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3026 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3027 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3028 * compromise. */
3029
3030 while ((d = e->inode_data_to_close)) {
3031 assert(d->fd >= 0);
3032 d->fd = safe_close(d->fd);
3033
3034 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3035 }
3036 }
3037
3038 _public_ int sd_event_prepare(sd_event *e) {
3039 int r;
3040
3041 assert_return(e, -EINVAL);
3042 assert_return(e = event_resolve(e), -ENOPKG);
3043 assert_return(!event_pid_changed(e), -ECHILD);
3044 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3045 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3046
3047 if (e->exit_requested)
3048 goto pending;
3049
3050 e->iteration++;
3051
3052 e->state = SD_EVENT_PREPARING;
3053 r = event_prepare(e);
3054 e->state = SD_EVENT_INITIAL;
3055 if (r < 0)
3056 return r;
3057
3058 r = event_arm_timer(e, &e->realtime);
3059 if (r < 0)
3060 return r;
3061
3062 r = event_arm_timer(e, &e->boottime);
3063 if (r < 0)
3064 return r;
3065
3066 r = event_arm_timer(e, &e->monotonic);
3067 if (r < 0)
3068 return r;
3069
3070 r = event_arm_timer(e, &e->realtime_alarm);
3071 if (r < 0)
3072 return r;
3073
3074 r = event_arm_timer(e, &e->boottime_alarm);
3075 if (r < 0)
3076 return r;
3077
3078 event_close_inode_data_fds(e);
3079
3080 if (event_next_pending(e) || e->need_process_child)
3081 goto pending;
3082
3083 e->state = SD_EVENT_ARMED;
3084
3085 return 0;
3086
3087 pending:
3088 e->state = SD_EVENT_ARMED;
3089 r = sd_event_wait(e, 0);
3090 if (r == 0)
3091 e->state = SD_EVENT_ARMED;
3092
3093 return r;
3094 }
3095
3096 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3097 struct epoll_event *ev_queue;
3098 unsigned ev_queue_max;
3099 int r, m, i;
3100
3101 assert_return(e, -EINVAL);
3102 assert_return(e = event_resolve(e), -ENOPKG);
3103 assert_return(!event_pid_changed(e), -ECHILD);
3104 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3105 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3106
3107 if (e->exit_requested) {
3108 e->state = SD_EVENT_PENDING;
3109 return 1;
3110 }
3111
3112 ev_queue_max = MAX(e->n_sources, 1u);
3113 ev_queue = newa(struct epoll_event, ev_queue_max);
3114
3115 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3116 if (e->inotify_data_buffered)
3117 timeout = 0;
3118
3119 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3120 timeout == (uint64_t) -1 ? -1 : (int) ((timeout + USEC_PER_MSEC - 1) / USEC_PER_MSEC));
3121 if (m < 0) {
3122 if (errno == EINTR) {
3123 e->state = SD_EVENT_PENDING;
3124 return 1;
3125 }
3126
3127 r = -errno;
3128 goto finish;
3129 }
3130
3131 triple_timestamp_get(&e->timestamp);
3132
3133 for (i = 0; i < m; i++) {
3134
3135 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3136 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3137 else {
3138 WakeupType *t = ev_queue[i].data.ptr;
3139
3140 switch (*t) {
3141
3142 case WAKEUP_EVENT_SOURCE:
3143 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3144 break;
3145
3146 case WAKEUP_CLOCK_DATA: {
3147 struct clock_data *d = ev_queue[i].data.ptr;
3148 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3149 break;
3150 }
3151
3152 case WAKEUP_SIGNAL_DATA:
3153 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3154 break;
3155
3156 case WAKEUP_INOTIFY_DATA:
3157 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3158 break;
3159
3160 default:
3161 assert_not_reached("Invalid wake-up pointer");
3162 }
3163 }
3164 if (r < 0)
3165 goto finish;
3166 }
3167
3168 r = process_watchdog(e);
3169 if (r < 0)
3170 goto finish;
3171
3172 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3173 if (r < 0)
3174 goto finish;
3175
3176 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3177 if (r < 0)
3178 goto finish;
3179
3180 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3181 if (r < 0)
3182 goto finish;
3183
3184 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3185 if (r < 0)
3186 goto finish;
3187
3188 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3189 if (r < 0)
3190 goto finish;
3191
3192 if (e->need_process_child) {
3193 r = process_child(e);
3194 if (r < 0)
3195 goto finish;
3196 }
3197
3198 r = process_inotify(e);
3199 if (r < 0)
3200 goto finish;
3201
3202 if (event_next_pending(e)) {
3203 e->state = SD_EVENT_PENDING;
3204
3205 return 1;
3206 }
3207
3208 r = 0;
3209
3210 finish:
3211 e->state = SD_EVENT_INITIAL;
3212
3213 return r;
3214 }
3215
3216 _public_ int sd_event_dispatch(sd_event *e) {
3217 sd_event_source *p;
3218 int r;
3219
3220 assert_return(e, -EINVAL);
3221 assert_return(e = event_resolve(e), -ENOPKG);
3222 assert_return(!event_pid_changed(e), -ECHILD);
3223 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3224 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3225
3226 if (e->exit_requested)
3227 return dispatch_exit(e);
3228
3229 p = event_next_pending(e);
3230 if (p) {
3231 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3232
3233 ref = sd_event_ref(e);
3234 e->state = SD_EVENT_RUNNING;
3235 r = source_dispatch(p);
3236 e->state = SD_EVENT_INITIAL;
3237 return r;
3238 }
3239
3240 e->state = SD_EVENT_INITIAL;
3241
3242 return 1;
3243 }
3244
3245 static void event_log_delays(sd_event *e) {
3246 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3247 unsigned i;
3248 int o;
3249
3250 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3251 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3252 e->delays[i] = 0;
3253 }
3254 log_debug("Event loop iterations: %.*s", o, b);
3255 }
3256
3257 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3258 int r;
3259
3260 assert_return(e, -EINVAL);
3261 assert_return(e = event_resolve(e), -ENOPKG);
3262 assert_return(!event_pid_changed(e), -ECHILD);
3263 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3264 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3265
3266 if (e->profile_delays && e->last_run) {
3267 usec_t this_run;
3268 unsigned l;
3269
3270 this_run = now(CLOCK_MONOTONIC);
3271
3272 l = u64log2(this_run - e->last_run);
3273 assert(l < sizeof(e->delays));
3274 e->delays[l]++;
3275
3276 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3277 event_log_delays(e);
3278 e->last_log = this_run;
3279 }
3280 }
3281
3282 r = sd_event_prepare(e);
3283 if (r == 0)
3284 /* There was nothing? Then wait... */
3285 r = sd_event_wait(e, timeout);
3286
3287 if (e->profile_delays)
3288 e->last_run = now(CLOCK_MONOTONIC);
3289
3290 if (r > 0) {
3291 /* There's something now, then let's dispatch it */
3292 r = sd_event_dispatch(e);
3293 if (r < 0)
3294 return r;
3295
3296 return 1;
3297 }
3298
3299 return r;
3300 }
3301
3302 _public_ int sd_event_loop(sd_event *e) {
3303 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3304 int r;
3305
3306 assert_return(e, -EINVAL);
3307 assert_return(e = event_resolve(e), -ENOPKG);
3308 assert_return(!event_pid_changed(e), -ECHILD);
3309 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3310
3311 ref = sd_event_ref(e);
3312
3313 while (e->state != SD_EVENT_FINISHED) {
3314 r = sd_event_run(e, (uint64_t) -1);
3315 if (r < 0)
3316 return r;
3317 }
3318
3319 return e->exit_code;
3320 }
3321
3322 _public_ int sd_event_get_fd(sd_event *e) {
3323
3324 assert_return(e, -EINVAL);
3325 assert_return(e = event_resolve(e), -ENOPKG);
3326 assert_return(!event_pid_changed(e), -ECHILD);
3327
3328 return e->epoll_fd;
3329 }
3330
3331 _public_ int sd_event_get_state(sd_event *e) {
3332 assert_return(e, -EINVAL);
3333 assert_return(e = event_resolve(e), -ENOPKG);
3334 assert_return(!event_pid_changed(e), -ECHILD);
3335
3336 return e->state;
3337 }
3338
3339 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3340 assert_return(e, -EINVAL);
3341 assert_return(e = event_resolve(e), -ENOPKG);
3342 assert_return(code, -EINVAL);
3343 assert_return(!event_pid_changed(e), -ECHILD);
3344
3345 if (!e->exit_requested)
3346 return -ENODATA;
3347
3348 *code = e->exit_code;
3349 return 0;
3350 }
3351
3352 _public_ int sd_event_exit(sd_event *e, int code) {
3353 assert_return(e, -EINVAL);
3354 assert_return(e = event_resolve(e), -ENOPKG);
3355 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3356 assert_return(!event_pid_changed(e), -ECHILD);
3357
3358 e->exit_requested = true;
3359 e->exit_code = code;
3360
3361 return 0;
3362 }
3363
3364 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3365 assert_return(e, -EINVAL);
3366 assert_return(e = event_resolve(e), -ENOPKG);
3367 assert_return(usec, -EINVAL);
3368 assert_return(!event_pid_changed(e), -ECHILD);
3369
3370 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3371 return -EOPNOTSUPP;
3372
3373 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3374 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3375 * the purpose of getting the time this doesn't matter. */
3376 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3377 return -EOPNOTSUPP;
3378
3379 if (!triple_timestamp_is_set(&e->timestamp)) {
3380 /* Implicitly fall back to now() if we never ran
3381 * before and thus have no cached time. */
3382 *usec = now(clock);
3383 return 1;
3384 }
3385
3386 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3387 return 0;
3388 }
3389
3390 _public_ int sd_event_default(sd_event **ret) {
3391 sd_event *e = NULL;
3392 int r;
3393
3394 if (!ret)
3395 return !!default_event;
3396
3397 if (default_event) {
3398 *ret = sd_event_ref(default_event);
3399 return 0;
3400 }
3401
3402 r = sd_event_new(&e);
3403 if (r < 0)
3404 return r;
3405
3406 e->default_event_ptr = &default_event;
3407 e->tid = gettid();
3408 default_event = e;
3409
3410 *ret = e;
3411 return 1;
3412 }
3413
3414 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3415 assert_return(e, -EINVAL);
3416 assert_return(e = event_resolve(e), -ENOPKG);
3417 assert_return(tid, -EINVAL);
3418 assert_return(!event_pid_changed(e), -ECHILD);
3419
3420 if (e->tid != 0) {
3421 *tid = e->tid;
3422 return 0;
3423 }
3424
3425 return -ENXIO;
3426 }
3427
3428 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3429 int r;
3430
3431 assert_return(e, -EINVAL);
3432 assert_return(e = event_resolve(e), -ENOPKG);
3433 assert_return(!event_pid_changed(e), -ECHILD);
3434
3435 if (e->watchdog == !!b)
3436 return e->watchdog;
3437
3438 if (b) {
3439 struct epoll_event ev;
3440
3441 r = sd_watchdog_enabled(false, &e->watchdog_period);
3442 if (r <= 0)
3443 return r;
3444
3445 /* Issue first ping immediately */
3446 sd_notify(false, "WATCHDOG=1");
3447 e->watchdog_last = now(CLOCK_MONOTONIC);
3448
3449 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3450 if (e->watchdog_fd < 0)
3451 return -errno;
3452
3453 r = arm_watchdog(e);
3454 if (r < 0)
3455 goto fail;
3456
3457 ev = (struct epoll_event) {
3458 .events = EPOLLIN,
3459 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3460 };
3461
3462 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3463 if (r < 0) {
3464 r = -errno;
3465 goto fail;
3466 }
3467
3468 } else {
3469 if (e->watchdog_fd >= 0) {
3470 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3471 e->watchdog_fd = safe_close(e->watchdog_fd);
3472 }
3473 }
3474
3475 e->watchdog = !!b;
3476 return e->watchdog;
3477
3478 fail:
3479 e->watchdog_fd = safe_close(e->watchdog_fd);
3480 return r;
3481 }
3482
3483 _public_ int sd_event_get_watchdog(sd_event *e) {
3484 assert_return(e, -EINVAL);
3485 assert_return(e = event_resolve(e), -ENOPKG);
3486 assert_return(!event_pid_changed(e), -ECHILD);
3487
3488 return e->watchdog;
3489 }
3490
3491 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3492 assert_return(e, -EINVAL);
3493 assert_return(e = event_resolve(e), -ENOPKG);
3494 assert_return(!event_pid_changed(e), -ECHILD);
3495
3496 *ret = e->iteration;
3497 return 0;
3498 }
3499
3500 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3501 assert_return(s, -EINVAL);
3502
3503 s->destroy_callback = callback;
3504 return 0;
3505 }
3506
3507 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3508 assert_return(s, -EINVAL);
3509
3510 if (ret)
3511 *ret = s->destroy_callback;
3512
3513 return !!s->destroy_callback;
3514 }
3515
3516 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3517 assert_return(s, -EINVAL);
3518
3519 return s->floating;
3520 }
3521
3522 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3523 assert_return(s, -EINVAL);
3524
3525 if (s->floating == !!b)
3526 return 0;
3527
3528 if (!s->event) /* Already disconnected */
3529 return -ESTALE;
3530
3531 s->floating = b;
3532
3533 if (b) {
3534 sd_event_source_ref(s);
3535 sd_event_unref(s->event);
3536 } else {
3537 sd_event_ref(s->event);
3538 sd_event_source_unref(s);
3539 }
3540
3541 return 1;
3542 }