]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Merge pull request #12518 from keszybz/naming-scheme
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "event-source.h"
13 #include "fd-util.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "list.h"
17 #include "macro.h"
18 #include "memory-util.h"
19 #include "missing.h"
20 #include "prioq.h"
21 #include "process-util.h"
22 #include "set.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
26 #include "time-util.h"
27
28 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
29
30 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
31 [SOURCE_IO] = "io",
32 [SOURCE_TIME_REALTIME] = "realtime",
33 [SOURCE_TIME_BOOTTIME] = "bootime",
34 [SOURCE_TIME_MONOTONIC] = "monotonic",
35 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
36 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
37 [SOURCE_SIGNAL] = "signal",
38 [SOURCE_CHILD] = "child",
39 [SOURCE_DEFER] = "defer",
40 [SOURCE_POST] = "post",
41 [SOURCE_EXIT] = "exit",
42 [SOURCE_WATCHDOG] = "watchdog",
43 [SOURCE_INOTIFY] = "inotify",
44 };
45
46 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
47
48 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
49
50 struct sd_event {
51 unsigned n_ref;
52
53 int epoll_fd;
54 int watchdog_fd;
55
56 Prioq *pending;
57 Prioq *prepare;
58
59 /* timerfd_create() only supports these five clocks so far. We
60 * can add support for more clocks when the kernel learns to
61 * deal with them, too. */
62 struct clock_data realtime;
63 struct clock_data boottime;
64 struct clock_data monotonic;
65 struct clock_data realtime_alarm;
66 struct clock_data boottime_alarm;
67
68 usec_t perturb;
69
70 sd_event_source **signal_sources; /* indexed by signal number */
71 Hashmap *signal_data; /* indexed by priority */
72
73 Hashmap *child_sources;
74 unsigned n_enabled_child_sources;
75
76 Set *post_sources;
77
78 Prioq *exit;
79
80 Hashmap *inotify_data; /* indexed by priority */
81
82 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
83 LIST_HEAD(struct inode_data, inode_data_to_close);
84
85 /* A list of inotify objects that already have events buffered which aren't processed yet */
86 LIST_HEAD(struct inotify_data, inotify_data_buffered);
87
88 pid_t original_pid;
89
90 uint64_t iteration;
91 triple_timestamp timestamp;
92 int state;
93
94 bool exit_requested:1;
95 bool need_process_child:1;
96 bool watchdog:1;
97 bool profile_delays:1;
98
99 int exit_code;
100
101 pid_t tid;
102 sd_event **default_event_ptr;
103
104 usec_t watchdog_last, watchdog_period;
105
106 unsigned n_sources;
107
108 LIST_HEAD(sd_event_source, sources);
109
110 usec_t last_run, last_log;
111 unsigned delays[sizeof(usec_t) * 8];
112 };
113
114 static thread_local sd_event *default_event = NULL;
115
116 static void source_disconnect(sd_event_source *s);
117 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
118
119 static sd_event *event_resolve(sd_event *e) {
120 return e == SD_EVENT_DEFAULT ? default_event : e;
121 }
122
123 static int pending_prioq_compare(const void *a, const void *b) {
124 const sd_event_source *x = a, *y = b;
125 int r;
126
127 assert(x->pending);
128 assert(y->pending);
129
130 /* Enabled ones first */
131 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
132 return -1;
133 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
134 return 1;
135
136 /* Lower priority values first */
137 r = CMP(x->priority, y->priority);
138 if (r != 0)
139 return r;
140
141 /* Older entries first */
142 return CMP(x->pending_iteration, y->pending_iteration);
143 }
144
145 static int prepare_prioq_compare(const void *a, const void *b) {
146 const sd_event_source *x = a, *y = b;
147 int r;
148
149 assert(x->prepare);
150 assert(y->prepare);
151
152 /* Enabled ones first */
153 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
154 return -1;
155 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
156 return 1;
157
158 /* Move most recently prepared ones last, so that we can stop
159 * preparing as soon as we hit one that has already been
160 * prepared in the current iteration */
161 r = CMP(x->prepare_iteration, y->prepare_iteration);
162 if (r != 0)
163 return r;
164
165 /* Lower priority values first */
166 return CMP(x->priority, y->priority);
167 }
168
169 static int earliest_time_prioq_compare(const void *a, const void *b) {
170 const sd_event_source *x = a, *y = b;
171
172 assert(EVENT_SOURCE_IS_TIME(x->type));
173 assert(x->type == y->type);
174
175 /* Enabled ones first */
176 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
177 return -1;
178 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
179 return 1;
180
181 /* Move the pending ones to the end */
182 if (!x->pending && y->pending)
183 return -1;
184 if (x->pending && !y->pending)
185 return 1;
186
187 /* Order by time */
188 return CMP(x->time.next, y->time.next);
189 }
190
191 static usec_t time_event_source_latest(const sd_event_source *s) {
192 return usec_add(s->time.next, s->time.accuracy);
193 }
194
195 static int latest_time_prioq_compare(const void *a, const void *b) {
196 const sd_event_source *x = a, *y = b;
197
198 assert(EVENT_SOURCE_IS_TIME(x->type));
199 assert(x->type == y->type);
200
201 /* Enabled ones first */
202 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203 return -1;
204 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205 return 1;
206
207 /* Move the pending ones to the end */
208 if (!x->pending && y->pending)
209 return -1;
210 if (x->pending && !y->pending)
211 return 1;
212
213 /* Order by time */
214 return CMP(time_event_source_latest(x), time_event_source_latest(y));
215 }
216
217 static int exit_prioq_compare(const void *a, const void *b) {
218 const sd_event_source *x = a, *y = b;
219
220 assert(x->type == SOURCE_EXIT);
221 assert(y->type == SOURCE_EXIT);
222
223 /* Enabled ones first */
224 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
225 return -1;
226 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
227 return 1;
228
229 /* Lower priority values first */
230 return CMP(x->priority, y->priority);
231 }
232
233 static void free_clock_data(struct clock_data *d) {
234 assert(d);
235 assert(d->wakeup == WAKEUP_CLOCK_DATA);
236
237 safe_close(d->fd);
238 prioq_free(d->earliest);
239 prioq_free(d->latest);
240 }
241
242 static sd_event *event_free(sd_event *e) {
243 sd_event_source *s;
244
245 assert(e);
246
247 while ((s = e->sources)) {
248 assert(s->floating);
249 source_disconnect(s);
250 sd_event_source_unref(s);
251 }
252
253 assert(e->n_sources == 0);
254
255 if (e->default_event_ptr)
256 *(e->default_event_ptr) = NULL;
257
258 safe_close(e->epoll_fd);
259 safe_close(e->watchdog_fd);
260
261 free_clock_data(&e->realtime);
262 free_clock_data(&e->boottime);
263 free_clock_data(&e->monotonic);
264 free_clock_data(&e->realtime_alarm);
265 free_clock_data(&e->boottime_alarm);
266
267 prioq_free(e->pending);
268 prioq_free(e->prepare);
269 prioq_free(e->exit);
270
271 free(e->signal_sources);
272 hashmap_free(e->signal_data);
273
274 hashmap_free(e->inotify_data);
275
276 hashmap_free(e->child_sources);
277 set_free(e->post_sources);
278
279 return mfree(e);
280 }
281
282 _public_ int sd_event_new(sd_event** ret) {
283 sd_event *e;
284 int r;
285
286 assert_return(ret, -EINVAL);
287
288 e = new(sd_event, 1);
289 if (!e)
290 return -ENOMEM;
291
292 *e = (sd_event) {
293 .n_ref = 1,
294 .epoll_fd = -1,
295 .watchdog_fd = -1,
296 .realtime.wakeup = WAKEUP_CLOCK_DATA,
297 .realtime.fd = -1,
298 .realtime.next = USEC_INFINITY,
299 .boottime.wakeup = WAKEUP_CLOCK_DATA,
300 .boottime.fd = -1,
301 .boottime.next = USEC_INFINITY,
302 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
303 .monotonic.fd = -1,
304 .monotonic.next = USEC_INFINITY,
305 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
306 .realtime_alarm.fd = -1,
307 .realtime_alarm.next = USEC_INFINITY,
308 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
309 .boottime_alarm.fd = -1,
310 .boottime_alarm.next = USEC_INFINITY,
311 .perturb = USEC_INFINITY,
312 .original_pid = getpid_cached(),
313 };
314
315 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
316 if (r < 0)
317 goto fail;
318
319 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
320 if (e->epoll_fd < 0) {
321 r = -errno;
322 goto fail;
323 }
324
325 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
326
327 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
328 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
329 e->profile_delays = true;
330 }
331
332 *ret = e;
333 return 0;
334
335 fail:
336 event_free(e);
337 return r;
338 }
339
340 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
341
342 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
343 if (s)
344 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
345 return sd_event_source_unref(s);
346 }
347
348 static bool event_pid_changed(sd_event *e) {
349 assert(e);
350
351 /* We don't support people creating an event loop and keeping
352 * it around over a fork(). Let's complain. */
353
354 return e->original_pid != getpid_cached();
355 }
356
357 static void source_io_unregister(sd_event_source *s) {
358 int r;
359
360 assert(s);
361 assert(s->type == SOURCE_IO);
362
363 if (event_pid_changed(s->event))
364 return;
365
366 if (!s->io.registered)
367 return;
368
369 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
370 if (r < 0)
371 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
372 strna(s->description), event_source_type_to_string(s->type));
373
374 s->io.registered = false;
375 }
376
377 static int source_io_register(
378 sd_event_source *s,
379 int enabled,
380 uint32_t events) {
381
382 struct epoll_event ev;
383 int r;
384
385 assert(s);
386 assert(s->type == SOURCE_IO);
387 assert(enabled != SD_EVENT_OFF);
388
389 ev = (struct epoll_event) {
390 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
391 .data.ptr = s,
392 };
393
394 if (s->io.registered)
395 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
396 else
397 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
398 if (r < 0)
399 return -errno;
400
401 s->io.registered = true;
402
403 return 0;
404 }
405
406 static clockid_t event_source_type_to_clock(EventSourceType t) {
407
408 switch (t) {
409
410 case SOURCE_TIME_REALTIME:
411 return CLOCK_REALTIME;
412
413 case SOURCE_TIME_BOOTTIME:
414 return CLOCK_BOOTTIME;
415
416 case SOURCE_TIME_MONOTONIC:
417 return CLOCK_MONOTONIC;
418
419 case SOURCE_TIME_REALTIME_ALARM:
420 return CLOCK_REALTIME_ALARM;
421
422 case SOURCE_TIME_BOOTTIME_ALARM:
423 return CLOCK_BOOTTIME_ALARM;
424
425 default:
426 return (clockid_t) -1;
427 }
428 }
429
430 static EventSourceType clock_to_event_source_type(clockid_t clock) {
431
432 switch (clock) {
433
434 case CLOCK_REALTIME:
435 return SOURCE_TIME_REALTIME;
436
437 case CLOCK_BOOTTIME:
438 return SOURCE_TIME_BOOTTIME;
439
440 case CLOCK_MONOTONIC:
441 return SOURCE_TIME_MONOTONIC;
442
443 case CLOCK_REALTIME_ALARM:
444 return SOURCE_TIME_REALTIME_ALARM;
445
446 case CLOCK_BOOTTIME_ALARM:
447 return SOURCE_TIME_BOOTTIME_ALARM;
448
449 default:
450 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
451 }
452 }
453
454 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
455 assert(e);
456
457 switch (t) {
458
459 case SOURCE_TIME_REALTIME:
460 return &e->realtime;
461
462 case SOURCE_TIME_BOOTTIME:
463 return &e->boottime;
464
465 case SOURCE_TIME_MONOTONIC:
466 return &e->monotonic;
467
468 case SOURCE_TIME_REALTIME_ALARM:
469 return &e->realtime_alarm;
470
471 case SOURCE_TIME_BOOTTIME_ALARM:
472 return &e->boottime_alarm;
473
474 default:
475 return NULL;
476 }
477 }
478
479 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
480 assert(e);
481
482 if (!d)
483 return;
484
485 hashmap_remove(e->signal_data, &d->priority);
486 safe_close(d->fd);
487 free(d);
488 }
489
490 static int event_make_signal_data(
491 sd_event *e,
492 int sig,
493 struct signal_data **ret) {
494
495 struct epoll_event ev;
496 struct signal_data *d;
497 bool added = false;
498 sigset_t ss_copy;
499 int64_t priority;
500 int r;
501
502 assert(e);
503
504 if (event_pid_changed(e))
505 return -ECHILD;
506
507 if (e->signal_sources && e->signal_sources[sig])
508 priority = e->signal_sources[sig]->priority;
509 else
510 priority = SD_EVENT_PRIORITY_NORMAL;
511
512 d = hashmap_get(e->signal_data, &priority);
513 if (d) {
514 if (sigismember(&d->sigset, sig) > 0) {
515 if (ret)
516 *ret = d;
517 return 0;
518 }
519 } else {
520 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
521 if (r < 0)
522 return r;
523
524 d = new(struct signal_data, 1);
525 if (!d)
526 return -ENOMEM;
527
528 *d = (struct signal_data) {
529 .wakeup = WAKEUP_SIGNAL_DATA,
530 .fd = -1,
531 .priority = priority,
532 };
533
534 r = hashmap_put(e->signal_data, &d->priority, d);
535 if (r < 0) {
536 free(d);
537 return r;
538 }
539
540 added = true;
541 }
542
543 ss_copy = d->sigset;
544 assert_se(sigaddset(&ss_copy, sig) >= 0);
545
546 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
547 if (r < 0) {
548 r = -errno;
549 goto fail;
550 }
551
552 d->sigset = ss_copy;
553
554 if (d->fd >= 0) {
555 if (ret)
556 *ret = d;
557 return 0;
558 }
559
560 d->fd = fd_move_above_stdio(r);
561
562 ev = (struct epoll_event) {
563 .events = EPOLLIN,
564 .data.ptr = d,
565 };
566
567 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
568 if (r < 0) {
569 r = -errno;
570 goto fail;
571 }
572
573 if (ret)
574 *ret = d;
575
576 return 0;
577
578 fail:
579 if (added)
580 event_free_signal_data(e, d);
581
582 return r;
583 }
584
585 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
586 assert(e);
587 assert(d);
588
589 /* Turns off the specified signal in the signal data
590 * object. If the signal mask of the object becomes empty that
591 * way removes it. */
592
593 if (sigismember(&d->sigset, sig) == 0)
594 return;
595
596 assert_se(sigdelset(&d->sigset, sig) >= 0);
597
598 if (sigisemptyset(&d->sigset)) {
599 /* If all the mask is all-zero we can get rid of the structure */
600 event_free_signal_data(e, d);
601 return;
602 }
603
604 assert(d->fd >= 0);
605
606 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
607 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
608 }
609
610 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
611 struct signal_data *d;
612 static const int64_t zero_priority = 0;
613
614 assert(e);
615
616 /* Rechecks if the specified signal is still something we are
617 * interested in. If not, we'll unmask it, and possibly drop
618 * the signalfd for it. */
619
620 if (sig == SIGCHLD &&
621 e->n_enabled_child_sources > 0)
622 return;
623
624 if (e->signal_sources &&
625 e->signal_sources[sig] &&
626 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
627 return;
628
629 /*
630 * The specified signal might be enabled in three different queues:
631 *
632 * 1) the one that belongs to the priority passed (if it is non-NULL)
633 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
634 * 3) the 0 priority (to cover the SIGCHLD case)
635 *
636 * Hence, let's remove it from all three here.
637 */
638
639 if (priority) {
640 d = hashmap_get(e->signal_data, priority);
641 if (d)
642 event_unmask_signal_data(e, d, sig);
643 }
644
645 if (e->signal_sources && e->signal_sources[sig]) {
646 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
647 if (d)
648 event_unmask_signal_data(e, d, sig);
649 }
650
651 d = hashmap_get(e->signal_data, &zero_priority);
652 if (d)
653 event_unmask_signal_data(e, d, sig);
654 }
655
656 static void source_disconnect(sd_event_source *s) {
657 sd_event *event;
658
659 assert(s);
660
661 if (!s->event)
662 return;
663
664 assert(s->event->n_sources > 0);
665
666 switch (s->type) {
667
668 case SOURCE_IO:
669 if (s->io.fd >= 0)
670 source_io_unregister(s);
671
672 break;
673
674 case SOURCE_TIME_REALTIME:
675 case SOURCE_TIME_BOOTTIME:
676 case SOURCE_TIME_MONOTONIC:
677 case SOURCE_TIME_REALTIME_ALARM:
678 case SOURCE_TIME_BOOTTIME_ALARM: {
679 struct clock_data *d;
680
681 d = event_get_clock_data(s->event, s->type);
682 assert(d);
683
684 prioq_remove(d->earliest, s, &s->time.earliest_index);
685 prioq_remove(d->latest, s, &s->time.latest_index);
686 d->needs_rearm = true;
687 break;
688 }
689
690 case SOURCE_SIGNAL:
691 if (s->signal.sig > 0) {
692
693 if (s->event->signal_sources)
694 s->event->signal_sources[s->signal.sig] = NULL;
695
696 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
697 }
698
699 break;
700
701 case SOURCE_CHILD:
702 if (s->child.pid > 0) {
703 if (s->enabled != SD_EVENT_OFF) {
704 assert(s->event->n_enabled_child_sources > 0);
705 s->event->n_enabled_child_sources--;
706 }
707
708 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
709 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
710 }
711
712 break;
713
714 case SOURCE_DEFER:
715 /* nothing */
716 break;
717
718 case SOURCE_POST:
719 set_remove(s->event->post_sources, s);
720 break;
721
722 case SOURCE_EXIT:
723 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
724 break;
725
726 case SOURCE_INOTIFY: {
727 struct inode_data *inode_data;
728
729 inode_data = s->inotify.inode_data;
730 if (inode_data) {
731 struct inotify_data *inotify_data;
732 assert_se(inotify_data = inode_data->inotify_data);
733
734 /* Detach this event source from the inode object */
735 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
736 s->inotify.inode_data = NULL;
737
738 if (s->pending) {
739 assert(inotify_data->n_pending > 0);
740 inotify_data->n_pending--;
741 }
742
743 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
744 * continued to being watched. That's because inotify doesn't really have an API for that: we
745 * can only change watch masks with access to the original inode either by fd or by path. But
746 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
747 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
748 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
749 * there), but given the need for open_by_handle_at() which is privileged and not universally
750 * available this would be quite an incomplete solution. Hence we go the other way, leave the
751 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
752 * anymore after reception. Yes, this sucks, but … Linux … */
753
754 /* Maybe release the inode data (and its inotify) */
755 event_gc_inode_data(s->event, inode_data);
756 }
757
758 break;
759 }
760
761 default:
762 assert_not_reached("Wut? I shouldn't exist.");
763 }
764
765 if (s->pending)
766 prioq_remove(s->event->pending, s, &s->pending_index);
767
768 if (s->prepare)
769 prioq_remove(s->event->prepare, s, &s->prepare_index);
770
771 event = s->event;
772
773 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
774 s->event = NULL;
775 LIST_REMOVE(sources, event->sources, s);
776 event->n_sources--;
777
778 if (!s->floating)
779 sd_event_unref(event);
780 }
781
782 static void source_free(sd_event_source *s) {
783 assert(s);
784
785 source_disconnect(s);
786
787 if (s->type == SOURCE_IO && s->io.owned)
788 s->io.fd = safe_close(s->io.fd);
789
790 if (s->destroy_callback)
791 s->destroy_callback(s->userdata);
792
793 free(s->description);
794 free(s);
795 }
796 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
797
798 static int source_set_pending(sd_event_source *s, bool b) {
799 int r;
800
801 assert(s);
802 assert(s->type != SOURCE_EXIT);
803
804 if (s->pending == b)
805 return 0;
806
807 s->pending = b;
808
809 if (b) {
810 s->pending_iteration = s->event->iteration;
811
812 r = prioq_put(s->event->pending, s, &s->pending_index);
813 if (r < 0) {
814 s->pending = false;
815 return r;
816 }
817 } else
818 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
819
820 if (EVENT_SOURCE_IS_TIME(s->type)) {
821 struct clock_data *d;
822
823 d = event_get_clock_data(s->event, s->type);
824 assert(d);
825
826 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
827 prioq_reshuffle(d->latest, s, &s->time.latest_index);
828 d->needs_rearm = true;
829 }
830
831 if (s->type == SOURCE_SIGNAL && !b) {
832 struct signal_data *d;
833
834 d = hashmap_get(s->event->signal_data, &s->priority);
835 if (d && d->current == s)
836 d->current = NULL;
837 }
838
839 if (s->type == SOURCE_INOTIFY) {
840
841 assert(s->inotify.inode_data);
842 assert(s->inotify.inode_data->inotify_data);
843
844 if (b)
845 s->inotify.inode_data->inotify_data->n_pending ++;
846 else {
847 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
848 s->inotify.inode_data->inotify_data->n_pending --;
849 }
850 }
851
852 return 0;
853 }
854
855 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
856 sd_event_source *s;
857
858 assert(e);
859
860 s = new(sd_event_source, 1);
861 if (!s)
862 return NULL;
863
864 *s = (struct sd_event_source) {
865 .n_ref = 1,
866 .event = e,
867 .floating = floating,
868 .type = type,
869 .pending_index = PRIOQ_IDX_NULL,
870 .prepare_index = PRIOQ_IDX_NULL,
871 };
872
873 if (!floating)
874 sd_event_ref(e);
875
876 LIST_PREPEND(sources, e->sources, s);
877 e->n_sources++;
878
879 return s;
880 }
881
882 _public_ int sd_event_add_io(
883 sd_event *e,
884 sd_event_source **ret,
885 int fd,
886 uint32_t events,
887 sd_event_io_handler_t callback,
888 void *userdata) {
889
890 _cleanup_(source_freep) sd_event_source *s = NULL;
891 int r;
892
893 assert_return(e, -EINVAL);
894 assert_return(e = event_resolve(e), -ENOPKG);
895 assert_return(fd >= 0, -EBADF);
896 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
897 assert_return(callback, -EINVAL);
898 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
899 assert_return(!event_pid_changed(e), -ECHILD);
900
901 s = source_new(e, !ret, SOURCE_IO);
902 if (!s)
903 return -ENOMEM;
904
905 s->wakeup = WAKEUP_EVENT_SOURCE;
906 s->io.fd = fd;
907 s->io.events = events;
908 s->io.callback = callback;
909 s->userdata = userdata;
910 s->enabled = SD_EVENT_ON;
911
912 r = source_io_register(s, s->enabled, events);
913 if (r < 0)
914 return r;
915
916 if (ret)
917 *ret = s;
918 TAKE_PTR(s);
919
920 return 0;
921 }
922
923 static void initialize_perturb(sd_event *e) {
924 sd_id128_t bootid = {};
925
926 /* When we sleep for longer, we try to realign the wakeup to
927 the same time within each minute/second/250ms, so that
928 events all across the system can be coalesced into a single
929 CPU wakeup. However, let's take some system-specific
930 randomness for this value, so that in a network of systems
931 with synced clocks timer events are distributed a
932 bit. Here, we calculate a perturbation usec offset from the
933 boot ID. */
934
935 if (_likely_(e->perturb != USEC_INFINITY))
936 return;
937
938 if (sd_id128_get_boot(&bootid) >= 0)
939 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
940 }
941
942 static int event_setup_timer_fd(
943 sd_event *e,
944 struct clock_data *d,
945 clockid_t clock) {
946
947 struct epoll_event ev;
948 int r, fd;
949
950 assert(e);
951 assert(d);
952
953 if (_likely_(d->fd >= 0))
954 return 0;
955
956 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
957 if (fd < 0)
958 return -errno;
959
960 fd = fd_move_above_stdio(fd);
961
962 ev = (struct epoll_event) {
963 .events = EPOLLIN,
964 .data.ptr = d,
965 };
966
967 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
968 if (r < 0) {
969 safe_close(fd);
970 return -errno;
971 }
972
973 d->fd = fd;
974 return 0;
975 }
976
977 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
978 assert(s);
979
980 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
981 }
982
983 _public_ int sd_event_add_time(
984 sd_event *e,
985 sd_event_source **ret,
986 clockid_t clock,
987 uint64_t usec,
988 uint64_t accuracy,
989 sd_event_time_handler_t callback,
990 void *userdata) {
991
992 EventSourceType type;
993 _cleanup_(source_freep) sd_event_source *s = NULL;
994 struct clock_data *d;
995 int r;
996
997 assert_return(e, -EINVAL);
998 assert_return(e = event_resolve(e), -ENOPKG);
999 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1000 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1001 assert_return(!event_pid_changed(e), -ECHILD);
1002
1003 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1004 return -EOPNOTSUPP;
1005
1006 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1007 if (type < 0)
1008 return -EOPNOTSUPP;
1009
1010 if (!callback)
1011 callback = time_exit_callback;
1012
1013 d = event_get_clock_data(e, type);
1014 assert(d);
1015
1016 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1017 if (r < 0)
1018 return r;
1019
1020 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1021 if (r < 0)
1022 return r;
1023
1024 if (d->fd < 0) {
1025 r = event_setup_timer_fd(e, d, clock);
1026 if (r < 0)
1027 return r;
1028 }
1029
1030 s = source_new(e, !ret, type);
1031 if (!s)
1032 return -ENOMEM;
1033
1034 s->time.next = usec;
1035 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1036 s->time.callback = callback;
1037 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1038 s->userdata = userdata;
1039 s->enabled = SD_EVENT_ONESHOT;
1040
1041 d->needs_rearm = true;
1042
1043 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1044 if (r < 0)
1045 return r;
1046
1047 r = prioq_put(d->latest, s, &s->time.latest_index);
1048 if (r < 0)
1049 return r;
1050
1051 if (ret)
1052 *ret = s;
1053 TAKE_PTR(s);
1054
1055 return 0;
1056 }
1057
1058 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1059 assert(s);
1060
1061 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1062 }
1063
1064 _public_ int sd_event_add_signal(
1065 sd_event *e,
1066 sd_event_source **ret,
1067 int sig,
1068 sd_event_signal_handler_t callback,
1069 void *userdata) {
1070
1071 _cleanup_(source_freep) sd_event_source *s = NULL;
1072 struct signal_data *d;
1073 sigset_t ss;
1074 int r;
1075
1076 assert_return(e, -EINVAL);
1077 assert_return(e = event_resolve(e), -ENOPKG);
1078 assert_return(SIGNAL_VALID(sig), -EINVAL);
1079 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1080 assert_return(!event_pid_changed(e), -ECHILD);
1081
1082 if (!callback)
1083 callback = signal_exit_callback;
1084
1085 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1086 if (r != 0)
1087 return -r;
1088
1089 if (!sigismember(&ss, sig))
1090 return -EBUSY;
1091
1092 if (!e->signal_sources) {
1093 e->signal_sources = new0(sd_event_source*, _NSIG);
1094 if (!e->signal_sources)
1095 return -ENOMEM;
1096 } else if (e->signal_sources[sig])
1097 return -EBUSY;
1098
1099 s = source_new(e, !ret, SOURCE_SIGNAL);
1100 if (!s)
1101 return -ENOMEM;
1102
1103 s->signal.sig = sig;
1104 s->signal.callback = callback;
1105 s->userdata = userdata;
1106 s->enabled = SD_EVENT_ON;
1107
1108 e->signal_sources[sig] = s;
1109
1110 r = event_make_signal_data(e, sig, &d);
1111 if (r < 0)
1112 return r;
1113
1114 /* Use the signal name as description for the event source by default */
1115 (void) sd_event_source_set_description(s, signal_to_string(sig));
1116
1117 if (ret)
1118 *ret = s;
1119 TAKE_PTR(s);
1120
1121 return 0;
1122 }
1123
1124 _public_ int sd_event_add_child(
1125 sd_event *e,
1126 sd_event_source **ret,
1127 pid_t pid,
1128 int options,
1129 sd_event_child_handler_t callback,
1130 void *userdata) {
1131
1132 _cleanup_(source_freep) sd_event_source *s = NULL;
1133 int r;
1134
1135 assert_return(e, -EINVAL);
1136 assert_return(e = event_resolve(e), -ENOPKG);
1137 assert_return(pid > 1, -EINVAL);
1138 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1139 assert_return(options != 0, -EINVAL);
1140 assert_return(callback, -EINVAL);
1141 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1142 assert_return(!event_pid_changed(e), -ECHILD);
1143
1144 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1145 if (r < 0)
1146 return r;
1147
1148 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1149 return -EBUSY;
1150
1151 s = source_new(e, !ret, SOURCE_CHILD);
1152 if (!s)
1153 return -ENOMEM;
1154
1155 s->child.pid = pid;
1156 s->child.options = options;
1157 s->child.callback = callback;
1158 s->userdata = userdata;
1159 s->enabled = SD_EVENT_ONESHOT;
1160
1161 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1162 if (r < 0)
1163 return r;
1164
1165 e->n_enabled_child_sources++;
1166
1167 r = event_make_signal_data(e, SIGCHLD, NULL);
1168 if (r < 0) {
1169 e->n_enabled_child_sources--;
1170 return r;
1171 }
1172
1173 e->need_process_child = true;
1174
1175 if (ret)
1176 *ret = s;
1177 TAKE_PTR(s);
1178
1179 return 0;
1180 }
1181
1182 _public_ int sd_event_add_defer(
1183 sd_event *e,
1184 sd_event_source **ret,
1185 sd_event_handler_t callback,
1186 void *userdata) {
1187
1188 _cleanup_(source_freep) sd_event_source *s = NULL;
1189 int r;
1190
1191 assert_return(e, -EINVAL);
1192 assert_return(e = event_resolve(e), -ENOPKG);
1193 assert_return(callback, -EINVAL);
1194 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1195 assert_return(!event_pid_changed(e), -ECHILD);
1196
1197 s = source_new(e, !ret, SOURCE_DEFER);
1198 if (!s)
1199 return -ENOMEM;
1200
1201 s->defer.callback = callback;
1202 s->userdata = userdata;
1203 s->enabled = SD_EVENT_ONESHOT;
1204
1205 r = source_set_pending(s, true);
1206 if (r < 0)
1207 return r;
1208
1209 if (ret)
1210 *ret = s;
1211 TAKE_PTR(s);
1212
1213 return 0;
1214 }
1215
1216 _public_ int sd_event_add_post(
1217 sd_event *e,
1218 sd_event_source **ret,
1219 sd_event_handler_t callback,
1220 void *userdata) {
1221
1222 _cleanup_(source_freep) sd_event_source *s = NULL;
1223 int r;
1224
1225 assert_return(e, -EINVAL);
1226 assert_return(e = event_resolve(e), -ENOPKG);
1227 assert_return(callback, -EINVAL);
1228 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1229 assert_return(!event_pid_changed(e), -ECHILD);
1230
1231 r = set_ensure_allocated(&e->post_sources, NULL);
1232 if (r < 0)
1233 return r;
1234
1235 s = source_new(e, !ret, SOURCE_POST);
1236 if (!s)
1237 return -ENOMEM;
1238
1239 s->post.callback = callback;
1240 s->userdata = userdata;
1241 s->enabled = SD_EVENT_ON;
1242
1243 r = set_put(e->post_sources, s);
1244 if (r < 0)
1245 return r;
1246
1247 if (ret)
1248 *ret = s;
1249 TAKE_PTR(s);
1250
1251 return 0;
1252 }
1253
1254 _public_ int sd_event_add_exit(
1255 sd_event *e,
1256 sd_event_source **ret,
1257 sd_event_handler_t callback,
1258 void *userdata) {
1259
1260 _cleanup_(source_freep) sd_event_source *s = NULL;
1261 int r;
1262
1263 assert_return(e, -EINVAL);
1264 assert_return(e = event_resolve(e), -ENOPKG);
1265 assert_return(callback, -EINVAL);
1266 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1267 assert_return(!event_pid_changed(e), -ECHILD);
1268
1269 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1270 if (r < 0)
1271 return r;
1272
1273 s = source_new(e, !ret, SOURCE_EXIT);
1274 if (!s)
1275 return -ENOMEM;
1276
1277 s->exit.callback = callback;
1278 s->userdata = userdata;
1279 s->exit.prioq_index = PRIOQ_IDX_NULL;
1280 s->enabled = SD_EVENT_ONESHOT;
1281
1282 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1283 if (r < 0)
1284 return r;
1285
1286 if (ret)
1287 *ret = s;
1288 TAKE_PTR(s);
1289
1290 return 0;
1291 }
1292
1293 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1294 assert(e);
1295
1296 if (!d)
1297 return;
1298
1299 assert(hashmap_isempty(d->inodes));
1300 assert(hashmap_isempty(d->wd));
1301
1302 if (d->buffer_filled > 0)
1303 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1304
1305 hashmap_free(d->inodes);
1306 hashmap_free(d->wd);
1307
1308 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1309
1310 if (d->fd >= 0) {
1311 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1312 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1313
1314 safe_close(d->fd);
1315 }
1316 free(d);
1317 }
1318
1319 static int event_make_inotify_data(
1320 sd_event *e,
1321 int64_t priority,
1322 struct inotify_data **ret) {
1323
1324 _cleanup_close_ int fd = -1;
1325 struct inotify_data *d;
1326 struct epoll_event ev;
1327 int r;
1328
1329 assert(e);
1330
1331 d = hashmap_get(e->inotify_data, &priority);
1332 if (d) {
1333 if (ret)
1334 *ret = d;
1335 return 0;
1336 }
1337
1338 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1339 if (fd < 0)
1340 return -errno;
1341
1342 fd = fd_move_above_stdio(fd);
1343
1344 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1345 if (r < 0)
1346 return r;
1347
1348 d = new(struct inotify_data, 1);
1349 if (!d)
1350 return -ENOMEM;
1351
1352 *d = (struct inotify_data) {
1353 .wakeup = WAKEUP_INOTIFY_DATA,
1354 .fd = TAKE_FD(fd),
1355 .priority = priority,
1356 };
1357
1358 r = hashmap_put(e->inotify_data, &d->priority, d);
1359 if (r < 0) {
1360 d->fd = safe_close(d->fd);
1361 free(d);
1362 return r;
1363 }
1364
1365 ev = (struct epoll_event) {
1366 .events = EPOLLIN,
1367 .data.ptr = d,
1368 };
1369
1370 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1371 r = -errno;
1372 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1373 * remove the fd from the epoll first, which we don't want as we couldn't
1374 * add it in the first place. */
1375 event_free_inotify_data(e, d);
1376 return r;
1377 }
1378
1379 if (ret)
1380 *ret = d;
1381
1382 return 1;
1383 }
1384
1385 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1386 int r;
1387
1388 assert(x);
1389 assert(y);
1390
1391 r = CMP(x->dev, y->dev);
1392 if (r != 0)
1393 return r;
1394
1395 return CMP(x->ino, y->ino);
1396 }
1397
1398 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1399 assert(d);
1400
1401 siphash24_compress(&d->dev, sizeof(d->dev), state);
1402 siphash24_compress(&d->ino, sizeof(d->ino), state);
1403 }
1404
1405 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1406
1407 static void event_free_inode_data(
1408 sd_event *e,
1409 struct inode_data *d) {
1410
1411 assert(e);
1412
1413 if (!d)
1414 return;
1415
1416 assert(!d->event_sources);
1417
1418 if (d->fd >= 0) {
1419 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1420 safe_close(d->fd);
1421 }
1422
1423 if (d->inotify_data) {
1424
1425 if (d->wd >= 0) {
1426 if (d->inotify_data->fd >= 0) {
1427 /* So here's a problem. At the time this runs the watch descriptor might already be
1428 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1429 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1430 * likely case to happen. */
1431
1432 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1433 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1434 }
1435
1436 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1437 }
1438
1439 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1440 }
1441
1442 free(d);
1443 }
1444
1445 static void event_gc_inode_data(
1446 sd_event *e,
1447 struct inode_data *d) {
1448
1449 struct inotify_data *inotify_data;
1450
1451 assert(e);
1452
1453 if (!d)
1454 return;
1455
1456 if (d->event_sources)
1457 return;
1458
1459 inotify_data = d->inotify_data;
1460 event_free_inode_data(e, d);
1461
1462 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1463 event_free_inotify_data(e, inotify_data);
1464 }
1465
1466 static int event_make_inode_data(
1467 sd_event *e,
1468 struct inotify_data *inotify_data,
1469 dev_t dev,
1470 ino_t ino,
1471 struct inode_data **ret) {
1472
1473 struct inode_data *d, key;
1474 int r;
1475
1476 assert(e);
1477 assert(inotify_data);
1478
1479 key = (struct inode_data) {
1480 .ino = ino,
1481 .dev = dev,
1482 };
1483
1484 d = hashmap_get(inotify_data->inodes, &key);
1485 if (d) {
1486 if (ret)
1487 *ret = d;
1488
1489 return 0;
1490 }
1491
1492 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1493 if (r < 0)
1494 return r;
1495
1496 d = new(struct inode_data, 1);
1497 if (!d)
1498 return -ENOMEM;
1499
1500 *d = (struct inode_data) {
1501 .dev = dev,
1502 .ino = ino,
1503 .wd = -1,
1504 .fd = -1,
1505 .inotify_data = inotify_data,
1506 };
1507
1508 r = hashmap_put(inotify_data->inodes, d, d);
1509 if (r < 0) {
1510 free(d);
1511 return r;
1512 }
1513
1514 if (ret)
1515 *ret = d;
1516
1517 return 1;
1518 }
1519
1520 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1521 bool excl_unlink = true;
1522 uint32_t combined = 0;
1523 sd_event_source *s;
1524
1525 assert(d);
1526
1527 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1528 * the IN_EXCL_UNLINK flag is ANDed instead.
1529 *
1530 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1531 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1532 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1533 * events we don't care for client-side. */
1534
1535 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1536
1537 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1538 excl_unlink = false;
1539
1540 combined |= s->inotify.mask;
1541 }
1542
1543 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1544 }
1545
1546 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1547 uint32_t combined_mask;
1548 int wd, r;
1549
1550 assert(d);
1551 assert(d->fd >= 0);
1552
1553 combined_mask = inode_data_determine_mask(d);
1554
1555 if (d->wd >= 0 && combined_mask == d->combined_mask)
1556 return 0;
1557
1558 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1559 if (r < 0)
1560 return r;
1561
1562 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1563 if (wd < 0)
1564 return -errno;
1565
1566 if (d->wd < 0) {
1567 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1568 if (r < 0) {
1569 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1570 return r;
1571 }
1572
1573 d->wd = wd;
1574
1575 } else if (d->wd != wd) {
1576
1577 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1578 (void) inotify_rm_watch(d->fd, wd);
1579 return -EINVAL;
1580 }
1581
1582 d->combined_mask = combined_mask;
1583 return 1;
1584 }
1585
1586 _public_ int sd_event_add_inotify(
1587 sd_event *e,
1588 sd_event_source **ret,
1589 const char *path,
1590 uint32_t mask,
1591 sd_event_inotify_handler_t callback,
1592 void *userdata) {
1593
1594 struct inotify_data *inotify_data = NULL;
1595 struct inode_data *inode_data = NULL;
1596 _cleanup_close_ int fd = -1;
1597 _cleanup_(source_freep) sd_event_source *s = NULL;
1598 struct stat st;
1599 int r;
1600
1601 assert_return(e, -EINVAL);
1602 assert_return(e = event_resolve(e), -ENOPKG);
1603 assert_return(path, -EINVAL);
1604 assert_return(callback, -EINVAL);
1605 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1606 assert_return(!event_pid_changed(e), -ECHILD);
1607
1608 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1609 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1610 * the user can't use them for us. */
1611 if (mask & IN_MASK_ADD)
1612 return -EINVAL;
1613
1614 fd = open(path, O_PATH|O_CLOEXEC|
1615 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1616 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1617 if (fd < 0)
1618 return -errno;
1619
1620 if (fstat(fd, &st) < 0)
1621 return -errno;
1622
1623 s = source_new(e, !ret, SOURCE_INOTIFY);
1624 if (!s)
1625 return -ENOMEM;
1626
1627 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1628 s->inotify.mask = mask;
1629 s->inotify.callback = callback;
1630 s->userdata = userdata;
1631
1632 /* Allocate an inotify object for this priority, and an inode object within it */
1633 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1634 if (r < 0)
1635 return r;
1636
1637 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1638 if (r < 0) {
1639 event_free_inotify_data(e, inotify_data);
1640 return r;
1641 }
1642
1643 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1644 * the event source, until then, for which we need the original inode. */
1645 if (inode_data->fd < 0) {
1646 inode_data->fd = TAKE_FD(fd);
1647 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1648 }
1649
1650 /* Link our event source to the inode data object */
1651 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1652 s->inotify.inode_data = inode_data;
1653
1654 /* Actually realize the watch now */
1655 r = inode_data_realize_watch(e, inode_data);
1656 if (r < 0)
1657 return r;
1658
1659 (void) sd_event_source_set_description(s, path);
1660
1661 if (ret)
1662 *ret = s;
1663 TAKE_PTR(s);
1664
1665 return 0;
1666 }
1667
1668 static sd_event_source* event_source_free(sd_event_source *s) {
1669 if (!s)
1670 return NULL;
1671
1672 /* Here's a special hack: when we are called from a
1673 * dispatch handler we won't free the event source
1674 * immediately, but we will detach the fd from the
1675 * epoll. This way it is safe for the caller to unref
1676 * the event source and immediately close the fd, but
1677 * we still retain a valid event source object after
1678 * the callback. */
1679
1680 if (s->dispatching) {
1681 if (s->type == SOURCE_IO)
1682 source_io_unregister(s);
1683
1684 source_disconnect(s);
1685 } else
1686 source_free(s);
1687
1688 return NULL;
1689 }
1690
1691 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1692
1693 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1694 assert_return(s, -EINVAL);
1695 assert_return(!event_pid_changed(s->event), -ECHILD);
1696
1697 return free_and_strdup(&s->description, description);
1698 }
1699
1700 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1701 assert_return(s, -EINVAL);
1702 assert_return(description, -EINVAL);
1703 assert_return(!event_pid_changed(s->event), -ECHILD);
1704
1705 if (!s->description)
1706 return -ENXIO;
1707
1708 *description = s->description;
1709 return 0;
1710 }
1711
1712 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1713 assert_return(s, NULL);
1714
1715 return s->event;
1716 }
1717
1718 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1719 assert_return(s, -EINVAL);
1720 assert_return(s->type != SOURCE_EXIT, -EDOM);
1721 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1722 assert_return(!event_pid_changed(s->event), -ECHILD);
1723
1724 return s->pending;
1725 }
1726
1727 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1728 assert_return(s, -EINVAL);
1729 assert_return(s->type == SOURCE_IO, -EDOM);
1730 assert_return(!event_pid_changed(s->event), -ECHILD);
1731
1732 return s->io.fd;
1733 }
1734
1735 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1736 int r;
1737
1738 assert_return(s, -EINVAL);
1739 assert_return(fd >= 0, -EBADF);
1740 assert_return(s->type == SOURCE_IO, -EDOM);
1741 assert_return(!event_pid_changed(s->event), -ECHILD);
1742
1743 if (s->io.fd == fd)
1744 return 0;
1745
1746 if (s->enabled == SD_EVENT_OFF) {
1747 s->io.fd = fd;
1748 s->io.registered = false;
1749 } else {
1750 int saved_fd;
1751
1752 saved_fd = s->io.fd;
1753 assert(s->io.registered);
1754
1755 s->io.fd = fd;
1756 s->io.registered = false;
1757
1758 r = source_io_register(s, s->enabled, s->io.events);
1759 if (r < 0) {
1760 s->io.fd = saved_fd;
1761 s->io.registered = true;
1762 return r;
1763 }
1764
1765 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1766 }
1767
1768 return 0;
1769 }
1770
1771 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1772 assert_return(s, -EINVAL);
1773 assert_return(s->type == SOURCE_IO, -EDOM);
1774
1775 return s->io.owned;
1776 }
1777
1778 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1779 assert_return(s, -EINVAL);
1780 assert_return(s->type == SOURCE_IO, -EDOM);
1781
1782 s->io.owned = own;
1783 return 0;
1784 }
1785
1786 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1787 assert_return(s, -EINVAL);
1788 assert_return(events, -EINVAL);
1789 assert_return(s->type == SOURCE_IO, -EDOM);
1790 assert_return(!event_pid_changed(s->event), -ECHILD);
1791
1792 *events = s->io.events;
1793 return 0;
1794 }
1795
1796 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1797 int r;
1798
1799 assert_return(s, -EINVAL);
1800 assert_return(s->type == SOURCE_IO, -EDOM);
1801 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1802 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1803 assert_return(!event_pid_changed(s->event), -ECHILD);
1804
1805 /* edge-triggered updates are never skipped, so we can reset edges */
1806 if (s->io.events == events && !(events & EPOLLET))
1807 return 0;
1808
1809 r = source_set_pending(s, false);
1810 if (r < 0)
1811 return r;
1812
1813 if (s->enabled != SD_EVENT_OFF) {
1814 r = source_io_register(s, s->enabled, events);
1815 if (r < 0)
1816 return r;
1817 }
1818
1819 s->io.events = events;
1820
1821 return 0;
1822 }
1823
1824 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1825 assert_return(s, -EINVAL);
1826 assert_return(revents, -EINVAL);
1827 assert_return(s->type == SOURCE_IO, -EDOM);
1828 assert_return(s->pending, -ENODATA);
1829 assert_return(!event_pid_changed(s->event), -ECHILD);
1830
1831 *revents = s->io.revents;
1832 return 0;
1833 }
1834
1835 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1836 assert_return(s, -EINVAL);
1837 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1838 assert_return(!event_pid_changed(s->event), -ECHILD);
1839
1840 return s->signal.sig;
1841 }
1842
1843 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1844 assert_return(s, -EINVAL);
1845 assert_return(!event_pid_changed(s->event), -ECHILD);
1846
1847 *priority = s->priority;
1848 return 0;
1849 }
1850
1851 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1852 bool rm_inotify = false, rm_inode = false;
1853 struct inotify_data *new_inotify_data = NULL;
1854 struct inode_data *new_inode_data = NULL;
1855 int r;
1856
1857 assert_return(s, -EINVAL);
1858 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1859 assert_return(!event_pid_changed(s->event), -ECHILD);
1860
1861 if (s->priority == priority)
1862 return 0;
1863
1864 if (s->type == SOURCE_INOTIFY) {
1865 struct inode_data *old_inode_data;
1866
1867 assert(s->inotify.inode_data);
1868 old_inode_data = s->inotify.inode_data;
1869
1870 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1871 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1872 * events we allow priority changes only until the first following iteration. */
1873 if (old_inode_data->fd < 0)
1874 return -EOPNOTSUPP;
1875
1876 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
1877 if (r < 0)
1878 return r;
1879 rm_inotify = r > 0;
1880
1881 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
1882 if (r < 0)
1883 goto fail;
1884 rm_inode = r > 0;
1885
1886 if (new_inode_data->fd < 0) {
1887 /* Duplicate the fd for the new inode object if we don't have any yet */
1888 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
1889 if (new_inode_data->fd < 0) {
1890 r = -errno;
1891 goto fail;
1892 }
1893
1894 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
1895 }
1896
1897 /* Move the event source to the new inode data structure */
1898 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
1899 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
1900 s->inotify.inode_data = new_inode_data;
1901
1902 /* Now create the new watch */
1903 r = inode_data_realize_watch(s->event, new_inode_data);
1904 if (r < 0) {
1905 /* Move it back */
1906 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
1907 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
1908 s->inotify.inode_data = old_inode_data;
1909 goto fail;
1910 }
1911
1912 s->priority = priority;
1913
1914 event_gc_inode_data(s->event, old_inode_data);
1915
1916 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1917 struct signal_data *old, *d;
1918
1919 /* Move us from the signalfd belonging to the old
1920 * priority to the signalfd of the new priority */
1921
1922 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1923
1924 s->priority = priority;
1925
1926 r = event_make_signal_data(s->event, s->signal.sig, &d);
1927 if (r < 0) {
1928 s->priority = old->priority;
1929 return r;
1930 }
1931
1932 event_unmask_signal_data(s->event, old, s->signal.sig);
1933 } else
1934 s->priority = priority;
1935
1936 if (s->pending)
1937 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1938
1939 if (s->prepare)
1940 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1941
1942 if (s->type == SOURCE_EXIT)
1943 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1944
1945 return 0;
1946
1947 fail:
1948 if (rm_inode)
1949 event_free_inode_data(s->event, new_inode_data);
1950
1951 if (rm_inotify)
1952 event_free_inotify_data(s->event, new_inotify_data);
1953
1954 return r;
1955 }
1956
1957 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1958 assert_return(s, -EINVAL);
1959 assert_return(!event_pid_changed(s->event), -ECHILD);
1960
1961 if (m)
1962 *m = s->enabled;
1963 return s->enabled != SD_EVENT_OFF;
1964 }
1965
1966 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1967 int r;
1968
1969 assert_return(s, -EINVAL);
1970 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1971 assert_return(!event_pid_changed(s->event), -ECHILD);
1972
1973 /* If we are dead anyway, we are fine with turning off
1974 * sources, but everything else needs to fail. */
1975 if (s->event->state == SD_EVENT_FINISHED)
1976 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1977
1978 if (s->enabled == m)
1979 return 0;
1980
1981 if (m == SD_EVENT_OFF) {
1982
1983 /* Unset the pending flag when this event source is disabled */
1984 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1985 r = source_set_pending(s, false);
1986 if (r < 0)
1987 return r;
1988 }
1989
1990 switch (s->type) {
1991
1992 case SOURCE_IO:
1993 source_io_unregister(s);
1994 s->enabled = m;
1995 break;
1996
1997 case SOURCE_TIME_REALTIME:
1998 case SOURCE_TIME_BOOTTIME:
1999 case SOURCE_TIME_MONOTONIC:
2000 case SOURCE_TIME_REALTIME_ALARM:
2001 case SOURCE_TIME_BOOTTIME_ALARM: {
2002 struct clock_data *d;
2003
2004 s->enabled = m;
2005 d = event_get_clock_data(s->event, s->type);
2006 assert(d);
2007
2008 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2009 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2010 d->needs_rearm = true;
2011 break;
2012 }
2013
2014 case SOURCE_SIGNAL:
2015 s->enabled = m;
2016
2017 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2018 break;
2019
2020 case SOURCE_CHILD:
2021 s->enabled = m;
2022
2023 assert(s->event->n_enabled_child_sources > 0);
2024 s->event->n_enabled_child_sources--;
2025
2026 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2027 break;
2028
2029 case SOURCE_EXIT:
2030 s->enabled = m;
2031 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2032 break;
2033
2034 case SOURCE_DEFER:
2035 case SOURCE_POST:
2036 case SOURCE_INOTIFY:
2037 s->enabled = m;
2038 break;
2039
2040 default:
2041 assert_not_reached("Wut? I shouldn't exist.");
2042 }
2043
2044 } else {
2045
2046 /* Unset the pending flag when this event source is enabled */
2047 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2048 r = source_set_pending(s, false);
2049 if (r < 0)
2050 return r;
2051 }
2052
2053 switch (s->type) {
2054
2055 case SOURCE_IO:
2056 r = source_io_register(s, m, s->io.events);
2057 if (r < 0)
2058 return r;
2059
2060 s->enabled = m;
2061 break;
2062
2063 case SOURCE_TIME_REALTIME:
2064 case SOURCE_TIME_BOOTTIME:
2065 case SOURCE_TIME_MONOTONIC:
2066 case SOURCE_TIME_REALTIME_ALARM:
2067 case SOURCE_TIME_BOOTTIME_ALARM: {
2068 struct clock_data *d;
2069
2070 s->enabled = m;
2071 d = event_get_clock_data(s->event, s->type);
2072 assert(d);
2073
2074 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2075 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2076 d->needs_rearm = true;
2077 break;
2078 }
2079
2080 case SOURCE_SIGNAL:
2081
2082 s->enabled = m;
2083
2084 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2085 if (r < 0) {
2086 s->enabled = SD_EVENT_OFF;
2087 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2088 return r;
2089 }
2090
2091 break;
2092
2093 case SOURCE_CHILD:
2094
2095 if (s->enabled == SD_EVENT_OFF)
2096 s->event->n_enabled_child_sources++;
2097
2098 s->enabled = m;
2099
2100 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2101 if (r < 0) {
2102 s->enabled = SD_EVENT_OFF;
2103 s->event->n_enabled_child_sources--;
2104 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2105 return r;
2106 }
2107
2108 break;
2109
2110 case SOURCE_EXIT:
2111 s->enabled = m;
2112 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2113 break;
2114
2115 case SOURCE_DEFER:
2116 case SOURCE_POST:
2117 case SOURCE_INOTIFY:
2118 s->enabled = m;
2119 break;
2120
2121 default:
2122 assert_not_reached("Wut? I shouldn't exist.");
2123 }
2124 }
2125
2126 if (s->pending)
2127 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2128
2129 if (s->prepare)
2130 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2131
2132 return 0;
2133 }
2134
2135 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2136 assert_return(s, -EINVAL);
2137 assert_return(usec, -EINVAL);
2138 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2139 assert_return(!event_pid_changed(s->event), -ECHILD);
2140
2141 *usec = s->time.next;
2142 return 0;
2143 }
2144
2145 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2146 struct clock_data *d;
2147 int r;
2148
2149 assert_return(s, -EINVAL);
2150 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2151 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2152 assert_return(!event_pid_changed(s->event), -ECHILD);
2153
2154 r = source_set_pending(s, false);
2155 if (r < 0)
2156 return r;
2157
2158 s->time.next = usec;
2159
2160 d = event_get_clock_data(s->event, s->type);
2161 assert(d);
2162
2163 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2164 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2165 d->needs_rearm = true;
2166
2167 return 0;
2168 }
2169
2170 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2171 assert_return(s, -EINVAL);
2172 assert_return(usec, -EINVAL);
2173 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2174 assert_return(!event_pid_changed(s->event), -ECHILD);
2175
2176 *usec = s->time.accuracy;
2177 return 0;
2178 }
2179
2180 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2181 struct clock_data *d;
2182 int r;
2183
2184 assert_return(s, -EINVAL);
2185 assert_return(usec != (uint64_t) -1, -EINVAL);
2186 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2187 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2188 assert_return(!event_pid_changed(s->event), -ECHILD);
2189
2190 r = source_set_pending(s, false);
2191 if (r < 0)
2192 return r;
2193
2194 if (usec == 0)
2195 usec = DEFAULT_ACCURACY_USEC;
2196
2197 s->time.accuracy = usec;
2198
2199 d = event_get_clock_data(s->event, s->type);
2200 assert(d);
2201
2202 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2203 d->needs_rearm = true;
2204
2205 return 0;
2206 }
2207
2208 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2209 assert_return(s, -EINVAL);
2210 assert_return(clock, -EINVAL);
2211 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2212 assert_return(!event_pid_changed(s->event), -ECHILD);
2213
2214 *clock = event_source_type_to_clock(s->type);
2215 return 0;
2216 }
2217
2218 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2219 assert_return(s, -EINVAL);
2220 assert_return(pid, -EINVAL);
2221 assert_return(s->type == SOURCE_CHILD, -EDOM);
2222 assert_return(!event_pid_changed(s->event), -ECHILD);
2223
2224 *pid = s->child.pid;
2225 return 0;
2226 }
2227
2228 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2229 assert_return(s, -EINVAL);
2230 assert_return(mask, -EINVAL);
2231 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2232 assert_return(!event_pid_changed(s->event), -ECHILD);
2233
2234 *mask = s->inotify.mask;
2235 return 0;
2236 }
2237
2238 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2239 int r;
2240
2241 assert_return(s, -EINVAL);
2242 assert_return(s->type != SOURCE_EXIT, -EDOM);
2243 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2244 assert_return(!event_pid_changed(s->event), -ECHILD);
2245
2246 if (s->prepare == callback)
2247 return 0;
2248
2249 if (callback && s->prepare) {
2250 s->prepare = callback;
2251 return 0;
2252 }
2253
2254 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2255 if (r < 0)
2256 return r;
2257
2258 s->prepare = callback;
2259
2260 if (callback) {
2261 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2262 if (r < 0)
2263 return r;
2264 } else
2265 prioq_remove(s->event->prepare, s, &s->prepare_index);
2266
2267 return 0;
2268 }
2269
2270 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2271 assert_return(s, NULL);
2272
2273 return s->userdata;
2274 }
2275
2276 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2277 void *ret;
2278
2279 assert_return(s, NULL);
2280
2281 ret = s->userdata;
2282 s->userdata = userdata;
2283
2284 return ret;
2285 }
2286
2287 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2288 usec_t c;
2289 assert(e);
2290 assert(a <= b);
2291
2292 if (a <= 0)
2293 return 0;
2294 if (a >= USEC_INFINITY)
2295 return USEC_INFINITY;
2296
2297 if (b <= a + 1)
2298 return a;
2299
2300 initialize_perturb(e);
2301
2302 /*
2303 Find a good time to wake up again between times a and b. We
2304 have two goals here:
2305
2306 a) We want to wake up as seldom as possible, hence prefer
2307 later times over earlier times.
2308
2309 b) But if we have to wake up, then let's make sure to
2310 dispatch as much as possible on the entire system.
2311
2312 We implement this by waking up everywhere at the same time
2313 within any given minute if we can, synchronised via the
2314 perturbation value determined from the boot ID. If we can't,
2315 then we try to find the same spot in every 10s, then 1s and
2316 then 250ms step. Otherwise, we pick the last possible time
2317 to wake up.
2318 */
2319
2320 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2321 if (c >= b) {
2322 if (_unlikely_(c < USEC_PER_MINUTE))
2323 return b;
2324
2325 c -= USEC_PER_MINUTE;
2326 }
2327
2328 if (c >= a)
2329 return c;
2330
2331 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2332 if (c >= b) {
2333 if (_unlikely_(c < USEC_PER_SEC*10))
2334 return b;
2335
2336 c -= USEC_PER_SEC*10;
2337 }
2338
2339 if (c >= a)
2340 return c;
2341
2342 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2343 if (c >= b) {
2344 if (_unlikely_(c < USEC_PER_SEC))
2345 return b;
2346
2347 c -= USEC_PER_SEC;
2348 }
2349
2350 if (c >= a)
2351 return c;
2352
2353 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2354 if (c >= b) {
2355 if (_unlikely_(c < USEC_PER_MSEC*250))
2356 return b;
2357
2358 c -= USEC_PER_MSEC*250;
2359 }
2360
2361 if (c >= a)
2362 return c;
2363
2364 return b;
2365 }
2366
2367 static int event_arm_timer(
2368 sd_event *e,
2369 struct clock_data *d) {
2370
2371 struct itimerspec its = {};
2372 sd_event_source *a, *b;
2373 usec_t t;
2374 int r;
2375
2376 assert(e);
2377 assert(d);
2378
2379 if (!d->needs_rearm)
2380 return 0;
2381 else
2382 d->needs_rearm = false;
2383
2384 a = prioq_peek(d->earliest);
2385 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2386
2387 if (d->fd < 0)
2388 return 0;
2389
2390 if (d->next == USEC_INFINITY)
2391 return 0;
2392
2393 /* disarm */
2394 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2395 if (r < 0)
2396 return r;
2397
2398 d->next = USEC_INFINITY;
2399 return 0;
2400 }
2401
2402 b = prioq_peek(d->latest);
2403 assert_se(b && b->enabled != SD_EVENT_OFF);
2404
2405 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2406 if (d->next == t)
2407 return 0;
2408
2409 assert_se(d->fd >= 0);
2410
2411 if (t == 0) {
2412 /* We don' want to disarm here, just mean some time looooong ago. */
2413 its.it_value.tv_sec = 0;
2414 its.it_value.tv_nsec = 1;
2415 } else
2416 timespec_store(&its.it_value, t);
2417
2418 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2419 if (r < 0)
2420 return -errno;
2421
2422 d->next = t;
2423 return 0;
2424 }
2425
2426 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2427 assert(e);
2428 assert(s);
2429 assert(s->type == SOURCE_IO);
2430
2431 /* If the event source was already pending, we just OR in the
2432 * new revents, otherwise we reset the value. The ORing is
2433 * necessary to handle EPOLLONESHOT events properly where
2434 * readability might happen independently of writability, and
2435 * we need to keep track of both */
2436
2437 if (s->pending)
2438 s->io.revents |= revents;
2439 else
2440 s->io.revents = revents;
2441
2442 return source_set_pending(s, true);
2443 }
2444
2445 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2446 uint64_t x;
2447 ssize_t ss;
2448
2449 assert(e);
2450 assert(fd >= 0);
2451
2452 assert_return(events == EPOLLIN, -EIO);
2453
2454 ss = read(fd, &x, sizeof(x));
2455 if (ss < 0) {
2456 if (IN_SET(errno, EAGAIN, EINTR))
2457 return 0;
2458
2459 return -errno;
2460 }
2461
2462 if (_unlikely_(ss != sizeof(x)))
2463 return -EIO;
2464
2465 if (next)
2466 *next = USEC_INFINITY;
2467
2468 return 0;
2469 }
2470
2471 static int process_timer(
2472 sd_event *e,
2473 usec_t n,
2474 struct clock_data *d) {
2475
2476 sd_event_source *s;
2477 int r;
2478
2479 assert(e);
2480 assert(d);
2481
2482 for (;;) {
2483 s = prioq_peek(d->earliest);
2484 if (!s ||
2485 s->time.next > n ||
2486 s->enabled == SD_EVENT_OFF ||
2487 s->pending)
2488 break;
2489
2490 r = source_set_pending(s, true);
2491 if (r < 0)
2492 return r;
2493
2494 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2495 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2496 d->needs_rearm = true;
2497 }
2498
2499 return 0;
2500 }
2501
2502 static int process_child(sd_event *e) {
2503 sd_event_source *s;
2504 Iterator i;
2505 int r;
2506
2507 assert(e);
2508
2509 e->need_process_child = false;
2510
2511 /*
2512 So, this is ugly. We iteratively invoke waitid() with P_PID
2513 + WNOHANG for each PID we wait for, instead of using
2514 P_ALL. This is because we only want to get child
2515 information of very specific child processes, and not all
2516 of them. We might not have processed the SIGCHLD even of a
2517 previous invocation and we don't want to maintain a
2518 unbounded *per-child* event queue, hence we really don't
2519 want anything flushed out of the kernel's queue that we
2520 don't care about. Since this is O(n) this means that if you
2521 have a lot of processes you probably want to handle SIGCHLD
2522 yourself.
2523
2524 We do not reap the children here (by using WNOWAIT), this
2525 is only done after the event source is dispatched so that
2526 the callback still sees the process as a zombie.
2527 */
2528
2529 HASHMAP_FOREACH(s, e->child_sources, i) {
2530 assert(s->type == SOURCE_CHILD);
2531
2532 if (s->pending)
2533 continue;
2534
2535 if (s->enabled == SD_EVENT_OFF)
2536 continue;
2537
2538 zero(s->child.siginfo);
2539 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2540 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2541 if (r < 0)
2542 return -errno;
2543
2544 if (s->child.siginfo.si_pid != 0) {
2545 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2546
2547 if (!zombie && (s->child.options & WEXITED)) {
2548 /* If the child isn't dead then let's
2549 * immediately remove the state change
2550 * from the queue, since there's no
2551 * benefit in leaving it queued */
2552
2553 assert(s->child.options & (WSTOPPED|WCONTINUED));
2554 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2555 }
2556
2557 r = source_set_pending(s, true);
2558 if (r < 0)
2559 return r;
2560 }
2561 }
2562
2563 return 0;
2564 }
2565
2566 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2567 bool read_one = false;
2568 int r;
2569
2570 assert(e);
2571 assert(d);
2572 assert_return(events == EPOLLIN, -EIO);
2573
2574 /* If there's a signal queued on this priority and SIGCHLD is
2575 on this priority too, then make sure to recheck the
2576 children we watch. This is because we only ever dequeue
2577 the first signal per priority, and if we dequeue one, and
2578 SIGCHLD might be enqueued later we wouldn't know, but we
2579 might have higher priority children we care about hence we
2580 need to check that explicitly. */
2581
2582 if (sigismember(&d->sigset, SIGCHLD))
2583 e->need_process_child = true;
2584
2585 /* If there's already an event source pending for this
2586 * priority we don't read another */
2587 if (d->current)
2588 return 0;
2589
2590 for (;;) {
2591 struct signalfd_siginfo si;
2592 ssize_t n;
2593 sd_event_source *s = NULL;
2594
2595 n = read(d->fd, &si, sizeof(si));
2596 if (n < 0) {
2597 if (IN_SET(errno, EAGAIN, EINTR))
2598 return read_one;
2599
2600 return -errno;
2601 }
2602
2603 if (_unlikely_(n != sizeof(si)))
2604 return -EIO;
2605
2606 assert(SIGNAL_VALID(si.ssi_signo));
2607
2608 read_one = true;
2609
2610 if (e->signal_sources)
2611 s = e->signal_sources[si.ssi_signo];
2612 if (!s)
2613 continue;
2614 if (s->pending)
2615 continue;
2616
2617 s->signal.siginfo = si;
2618 d->current = s;
2619
2620 r = source_set_pending(s, true);
2621 if (r < 0)
2622 return r;
2623
2624 return 1;
2625 }
2626 }
2627
2628 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2629 ssize_t n;
2630
2631 assert(e);
2632 assert(d);
2633
2634 assert_return(revents == EPOLLIN, -EIO);
2635
2636 /* If there's already an event source pending for this priority, don't read another */
2637 if (d->n_pending > 0)
2638 return 0;
2639
2640 /* Is the read buffer non-empty? If so, let's not read more */
2641 if (d->buffer_filled > 0)
2642 return 0;
2643
2644 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2645 if (n < 0) {
2646 if (IN_SET(errno, EAGAIN, EINTR))
2647 return 0;
2648
2649 return -errno;
2650 }
2651
2652 assert(n > 0);
2653 d->buffer_filled = (size_t) n;
2654 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2655
2656 return 1;
2657 }
2658
2659 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2660 assert(e);
2661 assert(d);
2662 assert(sz <= d->buffer_filled);
2663
2664 if (sz == 0)
2665 return;
2666
2667 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2668 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2669 d->buffer_filled -= sz;
2670
2671 if (d->buffer_filled == 0)
2672 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2673 }
2674
2675 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2676 int r;
2677
2678 assert(e);
2679 assert(d);
2680
2681 /* If there's already an event source pending for this priority, don't read another */
2682 if (d->n_pending > 0)
2683 return 0;
2684
2685 while (d->buffer_filled > 0) {
2686 size_t sz;
2687
2688 /* Let's validate that the event structures are complete */
2689 if (d->buffer_filled < offsetof(struct inotify_event, name))
2690 return -EIO;
2691
2692 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2693 if (d->buffer_filled < sz)
2694 return -EIO;
2695
2696 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2697 struct inode_data *inode_data;
2698 Iterator i;
2699
2700 /* The queue overran, let's pass this event to all event sources connected to this inotify
2701 * object */
2702
2703 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2704 sd_event_source *s;
2705
2706 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2707
2708 if (s->enabled == SD_EVENT_OFF)
2709 continue;
2710
2711 r = source_set_pending(s, true);
2712 if (r < 0)
2713 return r;
2714 }
2715 }
2716 } else {
2717 struct inode_data *inode_data;
2718 sd_event_source *s;
2719
2720 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2721 * our watch descriptor table. */
2722 if (d->buffer.ev.mask & IN_IGNORED) {
2723
2724 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2725 if (!inode_data) {
2726 event_inotify_data_drop(e, d, sz);
2727 continue;
2728 }
2729
2730 /* The watch descriptor was removed by the kernel, let's drop it here too */
2731 inode_data->wd = -1;
2732 } else {
2733 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2734 if (!inode_data) {
2735 event_inotify_data_drop(e, d, sz);
2736 continue;
2737 }
2738 }
2739
2740 /* Trigger all event sources that are interested in these events. Also trigger all event
2741 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2742 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2743
2744 if (s->enabled == SD_EVENT_OFF)
2745 continue;
2746
2747 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2748 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2749 continue;
2750
2751 r = source_set_pending(s, true);
2752 if (r < 0)
2753 return r;
2754 }
2755 }
2756
2757 /* Something pending now? If so, let's finish, otherwise let's read more. */
2758 if (d->n_pending > 0)
2759 return 1;
2760 }
2761
2762 return 0;
2763 }
2764
2765 static int process_inotify(sd_event *e) {
2766 struct inotify_data *d;
2767 int r, done = 0;
2768
2769 assert(e);
2770
2771 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2772 r = event_inotify_data_process(e, d);
2773 if (r < 0)
2774 return r;
2775 if (r > 0)
2776 done ++;
2777 }
2778
2779 return done;
2780 }
2781
2782 static int source_dispatch(sd_event_source *s) {
2783 EventSourceType saved_type;
2784 int r = 0;
2785
2786 assert(s);
2787 assert(s->pending || s->type == SOURCE_EXIT);
2788
2789 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2790 * the event. */
2791 saved_type = s->type;
2792
2793 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2794 r = source_set_pending(s, false);
2795 if (r < 0)
2796 return r;
2797 }
2798
2799 if (s->type != SOURCE_POST) {
2800 sd_event_source *z;
2801 Iterator i;
2802
2803 /* If we execute a non-post source, let's mark all
2804 * post sources as pending */
2805
2806 SET_FOREACH(z, s->event->post_sources, i) {
2807 if (z->enabled == SD_EVENT_OFF)
2808 continue;
2809
2810 r = source_set_pending(z, true);
2811 if (r < 0)
2812 return r;
2813 }
2814 }
2815
2816 if (s->enabled == SD_EVENT_ONESHOT) {
2817 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2818 if (r < 0)
2819 return r;
2820 }
2821
2822 s->dispatching = true;
2823
2824 switch (s->type) {
2825
2826 case SOURCE_IO:
2827 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2828 break;
2829
2830 case SOURCE_TIME_REALTIME:
2831 case SOURCE_TIME_BOOTTIME:
2832 case SOURCE_TIME_MONOTONIC:
2833 case SOURCE_TIME_REALTIME_ALARM:
2834 case SOURCE_TIME_BOOTTIME_ALARM:
2835 r = s->time.callback(s, s->time.next, s->userdata);
2836 break;
2837
2838 case SOURCE_SIGNAL:
2839 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2840 break;
2841
2842 case SOURCE_CHILD: {
2843 bool zombie;
2844
2845 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2846
2847 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2848
2849 /* Now, reap the PID for good. */
2850 if (zombie)
2851 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2852
2853 break;
2854 }
2855
2856 case SOURCE_DEFER:
2857 r = s->defer.callback(s, s->userdata);
2858 break;
2859
2860 case SOURCE_POST:
2861 r = s->post.callback(s, s->userdata);
2862 break;
2863
2864 case SOURCE_EXIT:
2865 r = s->exit.callback(s, s->userdata);
2866 break;
2867
2868 case SOURCE_INOTIFY: {
2869 struct sd_event *e = s->event;
2870 struct inotify_data *d;
2871 size_t sz;
2872
2873 assert(s->inotify.inode_data);
2874 assert_se(d = s->inotify.inode_data->inotify_data);
2875
2876 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
2877 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2878 assert(d->buffer_filled >= sz);
2879
2880 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
2881
2882 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2883 * buffer. */
2884 if (d->n_pending == 0)
2885 event_inotify_data_drop(e, d, sz);
2886
2887 break;
2888 }
2889
2890 case SOURCE_WATCHDOG:
2891 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2892 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2893 assert_not_reached("Wut? I shouldn't exist.");
2894 }
2895
2896 s->dispatching = false;
2897
2898 if (r < 0)
2899 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2900 strna(s->description), event_source_type_to_string(saved_type));
2901
2902 if (s->n_ref == 0)
2903 source_free(s);
2904 else if (r < 0)
2905 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2906
2907 return 1;
2908 }
2909
2910 static int event_prepare(sd_event *e) {
2911 int r;
2912
2913 assert(e);
2914
2915 for (;;) {
2916 sd_event_source *s;
2917
2918 s = prioq_peek(e->prepare);
2919 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2920 break;
2921
2922 s->prepare_iteration = e->iteration;
2923 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2924 if (r < 0)
2925 return r;
2926
2927 assert(s->prepare);
2928
2929 s->dispatching = true;
2930 r = s->prepare(s, s->userdata);
2931 s->dispatching = false;
2932
2933 if (r < 0)
2934 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2935 strna(s->description), event_source_type_to_string(s->type));
2936
2937 if (s->n_ref == 0)
2938 source_free(s);
2939 else if (r < 0)
2940 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2941 }
2942
2943 return 0;
2944 }
2945
2946 static int dispatch_exit(sd_event *e) {
2947 sd_event_source *p;
2948 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2949 int r;
2950
2951 assert(e);
2952
2953 p = prioq_peek(e->exit);
2954 if (!p || p->enabled == SD_EVENT_OFF) {
2955 e->state = SD_EVENT_FINISHED;
2956 return 0;
2957 }
2958
2959 ref = sd_event_ref(e);
2960 e->iteration++;
2961 e->state = SD_EVENT_EXITING;
2962 r = source_dispatch(p);
2963 e->state = SD_EVENT_INITIAL;
2964 return r;
2965 }
2966
2967 static sd_event_source* event_next_pending(sd_event *e) {
2968 sd_event_source *p;
2969
2970 assert(e);
2971
2972 p = prioq_peek(e->pending);
2973 if (!p)
2974 return NULL;
2975
2976 if (p->enabled == SD_EVENT_OFF)
2977 return NULL;
2978
2979 return p;
2980 }
2981
2982 static int arm_watchdog(sd_event *e) {
2983 struct itimerspec its = {};
2984 usec_t t;
2985 int r;
2986
2987 assert(e);
2988 assert(e->watchdog_fd >= 0);
2989
2990 t = sleep_between(e,
2991 e->watchdog_last + (e->watchdog_period / 2),
2992 e->watchdog_last + (e->watchdog_period * 3 / 4));
2993
2994 timespec_store(&its.it_value, t);
2995
2996 /* Make sure we never set the watchdog to 0, which tells the
2997 * kernel to disable it. */
2998 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2999 its.it_value.tv_nsec = 1;
3000
3001 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3002 if (r < 0)
3003 return -errno;
3004
3005 return 0;
3006 }
3007
3008 static int process_watchdog(sd_event *e) {
3009 assert(e);
3010
3011 if (!e->watchdog)
3012 return 0;
3013
3014 /* Don't notify watchdog too often */
3015 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3016 return 0;
3017
3018 sd_notify(false, "WATCHDOG=1");
3019 e->watchdog_last = e->timestamp.monotonic;
3020
3021 return arm_watchdog(e);
3022 }
3023
3024 static void event_close_inode_data_fds(sd_event *e) {
3025 struct inode_data *d;
3026
3027 assert(e);
3028
3029 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3030 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3031 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3032 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3033 * compromise. */
3034
3035 while ((d = e->inode_data_to_close)) {
3036 assert(d->fd >= 0);
3037 d->fd = safe_close(d->fd);
3038
3039 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3040 }
3041 }
3042
3043 _public_ int sd_event_prepare(sd_event *e) {
3044 int r;
3045
3046 assert_return(e, -EINVAL);
3047 assert_return(e = event_resolve(e), -ENOPKG);
3048 assert_return(!event_pid_changed(e), -ECHILD);
3049 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3050 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3051
3052 if (e->exit_requested)
3053 goto pending;
3054
3055 e->iteration++;
3056
3057 e->state = SD_EVENT_PREPARING;
3058 r = event_prepare(e);
3059 e->state = SD_EVENT_INITIAL;
3060 if (r < 0)
3061 return r;
3062
3063 r = event_arm_timer(e, &e->realtime);
3064 if (r < 0)
3065 return r;
3066
3067 r = event_arm_timer(e, &e->boottime);
3068 if (r < 0)
3069 return r;
3070
3071 r = event_arm_timer(e, &e->monotonic);
3072 if (r < 0)
3073 return r;
3074
3075 r = event_arm_timer(e, &e->realtime_alarm);
3076 if (r < 0)
3077 return r;
3078
3079 r = event_arm_timer(e, &e->boottime_alarm);
3080 if (r < 0)
3081 return r;
3082
3083 event_close_inode_data_fds(e);
3084
3085 if (event_next_pending(e) || e->need_process_child)
3086 goto pending;
3087
3088 e->state = SD_EVENT_ARMED;
3089
3090 return 0;
3091
3092 pending:
3093 e->state = SD_EVENT_ARMED;
3094 r = sd_event_wait(e, 0);
3095 if (r == 0)
3096 e->state = SD_EVENT_ARMED;
3097
3098 return r;
3099 }
3100
3101 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3102 struct epoll_event *ev_queue;
3103 unsigned ev_queue_max;
3104 int r, m, i;
3105
3106 assert_return(e, -EINVAL);
3107 assert_return(e = event_resolve(e), -ENOPKG);
3108 assert_return(!event_pid_changed(e), -ECHILD);
3109 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3110 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3111
3112 if (e->exit_requested) {
3113 e->state = SD_EVENT_PENDING;
3114 return 1;
3115 }
3116
3117 ev_queue_max = MAX(e->n_sources, 1u);
3118 ev_queue = newa(struct epoll_event, ev_queue_max);
3119
3120 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3121 if (e->inotify_data_buffered)
3122 timeout = 0;
3123
3124 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3125 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3126 if (m < 0) {
3127 if (errno == EINTR) {
3128 e->state = SD_EVENT_PENDING;
3129 return 1;
3130 }
3131
3132 r = -errno;
3133 goto finish;
3134 }
3135
3136 triple_timestamp_get(&e->timestamp);
3137
3138 for (i = 0; i < m; i++) {
3139
3140 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3141 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3142 else {
3143 WakeupType *t = ev_queue[i].data.ptr;
3144
3145 switch (*t) {
3146
3147 case WAKEUP_EVENT_SOURCE:
3148 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3149 break;
3150
3151 case WAKEUP_CLOCK_DATA: {
3152 struct clock_data *d = ev_queue[i].data.ptr;
3153 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3154 break;
3155 }
3156
3157 case WAKEUP_SIGNAL_DATA:
3158 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3159 break;
3160
3161 case WAKEUP_INOTIFY_DATA:
3162 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3163 break;
3164
3165 default:
3166 assert_not_reached("Invalid wake-up pointer");
3167 }
3168 }
3169 if (r < 0)
3170 goto finish;
3171 }
3172
3173 r = process_watchdog(e);
3174 if (r < 0)
3175 goto finish;
3176
3177 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3178 if (r < 0)
3179 goto finish;
3180
3181 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3182 if (r < 0)
3183 goto finish;
3184
3185 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3186 if (r < 0)
3187 goto finish;
3188
3189 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3190 if (r < 0)
3191 goto finish;
3192
3193 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3194 if (r < 0)
3195 goto finish;
3196
3197 if (e->need_process_child) {
3198 r = process_child(e);
3199 if (r < 0)
3200 goto finish;
3201 }
3202
3203 r = process_inotify(e);
3204 if (r < 0)
3205 goto finish;
3206
3207 if (event_next_pending(e)) {
3208 e->state = SD_EVENT_PENDING;
3209
3210 return 1;
3211 }
3212
3213 r = 0;
3214
3215 finish:
3216 e->state = SD_EVENT_INITIAL;
3217
3218 return r;
3219 }
3220
3221 _public_ int sd_event_dispatch(sd_event *e) {
3222 sd_event_source *p;
3223 int r;
3224
3225 assert_return(e, -EINVAL);
3226 assert_return(e = event_resolve(e), -ENOPKG);
3227 assert_return(!event_pid_changed(e), -ECHILD);
3228 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3229 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3230
3231 if (e->exit_requested)
3232 return dispatch_exit(e);
3233
3234 p = event_next_pending(e);
3235 if (p) {
3236 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3237
3238 ref = sd_event_ref(e);
3239 e->state = SD_EVENT_RUNNING;
3240 r = source_dispatch(p);
3241 e->state = SD_EVENT_INITIAL;
3242 return r;
3243 }
3244
3245 e->state = SD_EVENT_INITIAL;
3246
3247 return 1;
3248 }
3249
3250 static void event_log_delays(sd_event *e) {
3251 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3252 unsigned i;
3253 int o;
3254
3255 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3256 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3257 e->delays[i] = 0;
3258 }
3259 log_debug("Event loop iterations: %.*s", o, b);
3260 }
3261
3262 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3263 int r;
3264
3265 assert_return(e, -EINVAL);
3266 assert_return(e = event_resolve(e), -ENOPKG);
3267 assert_return(!event_pid_changed(e), -ECHILD);
3268 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3269 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3270
3271 if (e->profile_delays && e->last_run) {
3272 usec_t this_run;
3273 unsigned l;
3274
3275 this_run = now(CLOCK_MONOTONIC);
3276
3277 l = u64log2(this_run - e->last_run);
3278 assert(l < sizeof(e->delays));
3279 e->delays[l]++;
3280
3281 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3282 event_log_delays(e);
3283 e->last_log = this_run;
3284 }
3285 }
3286
3287 r = sd_event_prepare(e);
3288 if (r == 0)
3289 /* There was nothing? Then wait... */
3290 r = sd_event_wait(e, timeout);
3291
3292 if (e->profile_delays)
3293 e->last_run = now(CLOCK_MONOTONIC);
3294
3295 if (r > 0) {
3296 /* There's something now, then let's dispatch it */
3297 r = sd_event_dispatch(e);
3298 if (r < 0)
3299 return r;
3300
3301 return 1;
3302 }
3303
3304 return r;
3305 }
3306
3307 _public_ int sd_event_loop(sd_event *e) {
3308 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3309 int r;
3310
3311 assert_return(e, -EINVAL);
3312 assert_return(e = event_resolve(e), -ENOPKG);
3313 assert_return(!event_pid_changed(e), -ECHILD);
3314 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3315
3316 ref = sd_event_ref(e);
3317
3318 while (e->state != SD_EVENT_FINISHED) {
3319 r = sd_event_run(e, (uint64_t) -1);
3320 if (r < 0)
3321 return r;
3322 }
3323
3324 return e->exit_code;
3325 }
3326
3327 _public_ int sd_event_get_fd(sd_event *e) {
3328
3329 assert_return(e, -EINVAL);
3330 assert_return(e = event_resolve(e), -ENOPKG);
3331 assert_return(!event_pid_changed(e), -ECHILD);
3332
3333 return e->epoll_fd;
3334 }
3335
3336 _public_ int sd_event_get_state(sd_event *e) {
3337 assert_return(e, -EINVAL);
3338 assert_return(e = event_resolve(e), -ENOPKG);
3339 assert_return(!event_pid_changed(e), -ECHILD);
3340
3341 return e->state;
3342 }
3343
3344 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3345 assert_return(e, -EINVAL);
3346 assert_return(e = event_resolve(e), -ENOPKG);
3347 assert_return(code, -EINVAL);
3348 assert_return(!event_pid_changed(e), -ECHILD);
3349
3350 if (!e->exit_requested)
3351 return -ENODATA;
3352
3353 *code = e->exit_code;
3354 return 0;
3355 }
3356
3357 _public_ int sd_event_exit(sd_event *e, int code) {
3358 assert_return(e, -EINVAL);
3359 assert_return(e = event_resolve(e), -ENOPKG);
3360 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3361 assert_return(!event_pid_changed(e), -ECHILD);
3362
3363 e->exit_requested = true;
3364 e->exit_code = code;
3365
3366 return 0;
3367 }
3368
3369 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3370 assert_return(e, -EINVAL);
3371 assert_return(e = event_resolve(e), -ENOPKG);
3372 assert_return(usec, -EINVAL);
3373 assert_return(!event_pid_changed(e), -ECHILD);
3374
3375 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3376 return -EOPNOTSUPP;
3377
3378 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3379 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3380 * the purpose of getting the time this doesn't matter. */
3381 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3382 return -EOPNOTSUPP;
3383
3384 if (!triple_timestamp_is_set(&e->timestamp)) {
3385 /* Implicitly fall back to now() if we never ran
3386 * before and thus have no cached time. */
3387 *usec = now(clock);
3388 return 1;
3389 }
3390
3391 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3392 return 0;
3393 }
3394
3395 _public_ int sd_event_default(sd_event **ret) {
3396 sd_event *e = NULL;
3397 int r;
3398
3399 if (!ret)
3400 return !!default_event;
3401
3402 if (default_event) {
3403 *ret = sd_event_ref(default_event);
3404 return 0;
3405 }
3406
3407 r = sd_event_new(&e);
3408 if (r < 0)
3409 return r;
3410
3411 e->default_event_ptr = &default_event;
3412 e->tid = gettid();
3413 default_event = e;
3414
3415 *ret = e;
3416 return 1;
3417 }
3418
3419 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3420 assert_return(e, -EINVAL);
3421 assert_return(e = event_resolve(e), -ENOPKG);
3422 assert_return(tid, -EINVAL);
3423 assert_return(!event_pid_changed(e), -ECHILD);
3424
3425 if (e->tid != 0) {
3426 *tid = e->tid;
3427 return 0;
3428 }
3429
3430 return -ENXIO;
3431 }
3432
3433 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3434 int r;
3435
3436 assert_return(e, -EINVAL);
3437 assert_return(e = event_resolve(e), -ENOPKG);
3438 assert_return(!event_pid_changed(e), -ECHILD);
3439
3440 if (e->watchdog == !!b)
3441 return e->watchdog;
3442
3443 if (b) {
3444 struct epoll_event ev;
3445
3446 r = sd_watchdog_enabled(false, &e->watchdog_period);
3447 if (r <= 0)
3448 return r;
3449
3450 /* Issue first ping immediately */
3451 sd_notify(false, "WATCHDOG=1");
3452 e->watchdog_last = now(CLOCK_MONOTONIC);
3453
3454 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3455 if (e->watchdog_fd < 0)
3456 return -errno;
3457
3458 r = arm_watchdog(e);
3459 if (r < 0)
3460 goto fail;
3461
3462 ev = (struct epoll_event) {
3463 .events = EPOLLIN,
3464 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3465 };
3466
3467 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3468 if (r < 0) {
3469 r = -errno;
3470 goto fail;
3471 }
3472
3473 } else {
3474 if (e->watchdog_fd >= 0) {
3475 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3476 e->watchdog_fd = safe_close(e->watchdog_fd);
3477 }
3478 }
3479
3480 e->watchdog = !!b;
3481 return e->watchdog;
3482
3483 fail:
3484 e->watchdog_fd = safe_close(e->watchdog_fd);
3485 return r;
3486 }
3487
3488 _public_ int sd_event_get_watchdog(sd_event *e) {
3489 assert_return(e, -EINVAL);
3490 assert_return(e = event_resolve(e), -ENOPKG);
3491 assert_return(!event_pid_changed(e), -ECHILD);
3492
3493 return e->watchdog;
3494 }
3495
3496 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3497 assert_return(e, -EINVAL);
3498 assert_return(e = event_resolve(e), -ENOPKG);
3499 assert_return(!event_pid_changed(e), -ECHILD);
3500
3501 *ret = e->iteration;
3502 return 0;
3503 }
3504
3505 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3506 assert_return(s, -EINVAL);
3507
3508 s->destroy_callback = callback;
3509 return 0;
3510 }
3511
3512 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3513 assert_return(s, -EINVAL);
3514
3515 if (ret)
3516 *ret = s->destroy_callback;
3517
3518 return !!s->destroy_callback;
3519 }
3520
3521 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3522 assert_return(s, -EINVAL);
3523
3524 return s->floating;
3525 }
3526
3527 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3528 assert_return(s, -EINVAL);
3529
3530 if (s->floating == !!b)
3531 return 0;
3532
3533 if (!s->event) /* Already disconnected */
3534 return -ESTALE;
3535
3536 s->floating = b;
3537
3538 if (b) {
3539 sd_event_source_ref(s);
3540 sd_event_unref(s->event);
3541 } else {
3542 sd_event_ref(s->event);
3543 sd_event_source_unref(s);
3544 }
3545
3546 return 1;
3547 }