]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Merge pull request #13891 from yuwata/basic-drop-missing
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "event-source.h"
13 #include "fd-util.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "list.h"
17 #include "macro.h"
18 #include "memory-util.h"
19 #include "missing_syscall.h"
20 #include "prioq.h"
21 #include "process-util.h"
22 #include "set.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
26 #include "strxcpyx.h"
27 #include "time-util.h"
28
29 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
30
31 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
32 [SOURCE_IO] = "io",
33 [SOURCE_TIME_REALTIME] = "realtime",
34 [SOURCE_TIME_BOOTTIME] = "bootime",
35 [SOURCE_TIME_MONOTONIC] = "monotonic",
36 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
37 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
38 [SOURCE_SIGNAL] = "signal",
39 [SOURCE_CHILD] = "child",
40 [SOURCE_DEFER] = "defer",
41 [SOURCE_POST] = "post",
42 [SOURCE_EXIT] = "exit",
43 [SOURCE_WATCHDOG] = "watchdog",
44 [SOURCE_INOTIFY] = "inotify",
45 };
46
47 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
48
49 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
50
51 struct sd_event {
52 unsigned n_ref;
53
54 int epoll_fd;
55 int watchdog_fd;
56
57 Prioq *pending;
58 Prioq *prepare;
59
60 /* timerfd_create() only supports these five clocks so far. We
61 * can add support for more clocks when the kernel learns to
62 * deal with them, too. */
63 struct clock_data realtime;
64 struct clock_data boottime;
65 struct clock_data monotonic;
66 struct clock_data realtime_alarm;
67 struct clock_data boottime_alarm;
68
69 usec_t perturb;
70
71 sd_event_source **signal_sources; /* indexed by signal number */
72 Hashmap *signal_data; /* indexed by priority */
73
74 Hashmap *child_sources;
75 unsigned n_enabled_child_sources;
76
77 Set *post_sources;
78
79 Prioq *exit;
80
81 Hashmap *inotify_data; /* indexed by priority */
82
83 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
84 LIST_HEAD(struct inode_data, inode_data_to_close);
85
86 /* A list of inotify objects that already have events buffered which aren't processed yet */
87 LIST_HEAD(struct inotify_data, inotify_data_buffered);
88
89 pid_t original_pid;
90
91 uint64_t iteration;
92 triple_timestamp timestamp;
93 int state;
94
95 bool exit_requested:1;
96 bool need_process_child:1;
97 bool watchdog:1;
98 bool profile_delays:1;
99
100 int exit_code;
101
102 pid_t tid;
103 sd_event **default_event_ptr;
104
105 usec_t watchdog_last, watchdog_period;
106
107 unsigned n_sources;
108
109 LIST_HEAD(sd_event_source, sources);
110
111 usec_t last_run, last_log;
112 unsigned delays[sizeof(usec_t) * 8];
113 };
114
115 static thread_local sd_event *default_event = NULL;
116
117 static void source_disconnect(sd_event_source *s);
118 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
119
120 static sd_event *event_resolve(sd_event *e) {
121 return e == SD_EVENT_DEFAULT ? default_event : e;
122 }
123
124 static int pending_prioq_compare(const void *a, const void *b) {
125 const sd_event_source *x = a, *y = b;
126 int r;
127
128 assert(x->pending);
129 assert(y->pending);
130
131 /* Enabled ones first */
132 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
133 return -1;
134 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
135 return 1;
136
137 /* Lower priority values first */
138 r = CMP(x->priority, y->priority);
139 if (r != 0)
140 return r;
141
142 /* Older entries first */
143 return CMP(x->pending_iteration, y->pending_iteration);
144 }
145
146 static int prepare_prioq_compare(const void *a, const void *b) {
147 const sd_event_source *x = a, *y = b;
148 int r;
149
150 assert(x->prepare);
151 assert(y->prepare);
152
153 /* Enabled ones first */
154 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
155 return -1;
156 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
157 return 1;
158
159 /* Move most recently prepared ones last, so that we can stop
160 * preparing as soon as we hit one that has already been
161 * prepared in the current iteration */
162 r = CMP(x->prepare_iteration, y->prepare_iteration);
163 if (r != 0)
164 return r;
165
166 /* Lower priority values first */
167 return CMP(x->priority, y->priority);
168 }
169
170 static int earliest_time_prioq_compare(const void *a, const void *b) {
171 const sd_event_source *x = a, *y = b;
172
173 assert(EVENT_SOURCE_IS_TIME(x->type));
174 assert(x->type == y->type);
175
176 /* Enabled ones first */
177 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
178 return -1;
179 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
180 return 1;
181
182 /* Move the pending ones to the end */
183 if (!x->pending && y->pending)
184 return -1;
185 if (x->pending && !y->pending)
186 return 1;
187
188 /* Order by time */
189 return CMP(x->time.next, y->time.next);
190 }
191
192 static usec_t time_event_source_latest(const sd_event_source *s) {
193 return usec_add(s->time.next, s->time.accuracy);
194 }
195
196 static int latest_time_prioq_compare(const void *a, const void *b) {
197 const sd_event_source *x = a, *y = b;
198
199 assert(EVENT_SOURCE_IS_TIME(x->type));
200 assert(x->type == y->type);
201
202 /* Enabled ones first */
203 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204 return -1;
205 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206 return 1;
207
208 /* Move the pending ones to the end */
209 if (!x->pending && y->pending)
210 return -1;
211 if (x->pending && !y->pending)
212 return 1;
213
214 /* Order by time */
215 return CMP(time_event_source_latest(x), time_event_source_latest(y));
216 }
217
218 static int exit_prioq_compare(const void *a, const void *b) {
219 const sd_event_source *x = a, *y = b;
220
221 assert(x->type == SOURCE_EXIT);
222 assert(y->type == SOURCE_EXIT);
223
224 /* Enabled ones first */
225 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
226 return -1;
227 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
228 return 1;
229
230 /* Lower priority values first */
231 return CMP(x->priority, y->priority);
232 }
233
234 static void free_clock_data(struct clock_data *d) {
235 assert(d);
236 assert(d->wakeup == WAKEUP_CLOCK_DATA);
237
238 safe_close(d->fd);
239 prioq_free(d->earliest);
240 prioq_free(d->latest);
241 }
242
243 static sd_event *event_free(sd_event *e) {
244 sd_event_source *s;
245
246 assert(e);
247
248 while ((s = e->sources)) {
249 assert(s->floating);
250 source_disconnect(s);
251 sd_event_source_unref(s);
252 }
253
254 assert(e->n_sources == 0);
255
256 if (e->default_event_ptr)
257 *(e->default_event_ptr) = NULL;
258
259 safe_close(e->epoll_fd);
260 safe_close(e->watchdog_fd);
261
262 free_clock_data(&e->realtime);
263 free_clock_data(&e->boottime);
264 free_clock_data(&e->monotonic);
265 free_clock_data(&e->realtime_alarm);
266 free_clock_data(&e->boottime_alarm);
267
268 prioq_free(e->pending);
269 prioq_free(e->prepare);
270 prioq_free(e->exit);
271
272 free(e->signal_sources);
273 hashmap_free(e->signal_data);
274
275 hashmap_free(e->inotify_data);
276
277 hashmap_free(e->child_sources);
278 set_free(e->post_sources);
279
280 return mfree(e);
281 }
282
283 _public_ int sd_event_new(sd_event** ret) {
284 sd_event *e;
285 int r;
286
287 assert_return(ret, -EINVAL);
288
289 e = new(sd_event, 1);
290 if (!e)
291 return -ENOMEM;
292
293 *e = (sd_event) {
294 .n_ref = 1,
295 .epoll_fd = -1,
296 .watchdog_fd = -1,
297 .realtime.wakeup = WAKEUP_CLOCK_DATA,
298 .realtime.fd = -1,
299 .realtime.next = USEC_INFINITY,
300 .boottime.wakeup = WAKEUP_CLOCK_DATA,
301 .boottime.fd = -1,
302 .boottime.next = USEC_INFINITY,
303 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
304 .monotonic.fd = -1,
305 .monotonic.next = USEC_INFINITY,
306 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
307 .realtime_alarm.fd = -1,
308 .realtime_alarm.next = USEC_INFINITY,
309 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
310 .boottime_alarm.fd = -1,
311 .boottime_alarm.next = USEC_INFINITY,
312 .perturb = USEC_INFINITY,
313 .original_pid = getpid_cached(),
314 };
315
316 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
317 if (r < 0)
318 goto fail;
319
320 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
321 if (e->epoll_fd < 0) {
322 r = -errno;
323 goto fail;
324 }
325
326 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
327
328 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
329 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
330 e->profile_delays = true;
331 }
332
333 *ret = e;
334 return 0;
335
336 fail:
337 event_free(e);
338 return r;
339 }
340
341 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
342
343 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
344 if (s)
345 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
346 return sd_event_source_unref(s);
347 }
348
349 static bool event_pid_changed(sd_event *e) {
350 assert(e);
351
352 /* We don't support people creating an event loop and keeping
353 * it around over a fork(). Let's complain. */
354
355 return e->original_pid != getpid_cached();
356 }
357
358 static void source_io_unregister(sd_event_source *s) {
359 int r;
360
361 assert(s);
362 assert(s->type == SOURCE_IO);
363
364 if (event_pid_changed(s->event))
365 return;
366
367 if (!s->io.registered)
368 return;
369
370 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
371 if (r < 0)
372 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
373 strna(s->description), event_source_type_to_string(s->type));
374
375 s->io.registered = false;
376 }
377
378 static int source_io_register(
379 sd_event_source *s,
380 int enabled,
381 uint32_t events) {
382
383 struct epoll_event ev;
384 int r;
385
386 assert(s);
387 assert(s->type == SOURCE_IO);
388 assert(enabled != SD_EVENT_OFF);
389
390 ev = (struct epoll_event) {
391 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
392 .data.ptr = s,
393 };
394
395 if (s->io.registered)
396 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
397 else
398 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
399 if (r < 0)
400 return -errno;
401
402 s->io.registered = true;
403
404 return 0;
405 }
406
407 static clockid_t event_source_type_to_clock(EventSourceType t) {
408
409 switch (t) {
410
411 case SOURCE_TIME_REALTIME:
412 return CLOCK_REALTIME;
413
414 case SOURCE_TIME_BOOTTIME:
415 return CLOCK_BOOTTIME;
416
417 case SOURCE_TIME_MONOTONIC:
418 return CLOCK_MONOTONIC;
419
420 case SOURCE_TIME_REALTIME_ALARM:
421 return CLOCK_REALTIME_ALARM;
422
423 case SOURCE_TIME_BOOTTIME_ALARM:
424 return CLOCK_BOOTTIME_ALARM;
425
426 default:
427 return (clockid_t) -1;
428 }
429 }
430
431 static EventSourceType clock_to_event_source_type(clockid_t clock) {
432
433 switch (clock) {
434
435 case CLOCK_REALTIME:
436 return SOURCE_TIME_REALTIME;
437
438 case CLOCK_BOOTTIME:
439 return SOURCE_TIME_BOOTTIME;
440
441 case CLOCK_MONOTONIC:
442 return SOURCE_TIME_MONOTONIC;
443
444 case CLOCK_REALTIME_ALARM:
445 return SOURCE_TIME_REALTIME_ALARM;
446
447 case CLOCK_BOOTTIME_ALARM:
448 return SOURCE_TIME_BOOTTIME_ALARM;
449
450 default:
451 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
452 }
453 }
454
455 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
456 assert(e);
457
458 switch (t) {
459
460 case SOURCE_TIME_REALTIME:
461 return &e->realtime;
462
463 case SOURCE_TIME_BOOTTIME:
464 return &e->boottime;
465
466 case SOURCE_TIME_MONOTONIC:
467 return &e->monotonic;
468
469 case SOURCE_TIME_REALTIME_ALARM:
470 return &e->realtime_alarm;
471
472 case SOURCE_TIME_BOOTTIME_ALARM:
473 return &e->boottime_alarm;
474
475 default:
476 return NULL;
477 }
478 }
479
480 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
481 assert(e);
482
483 if (!d)
484 return;
485
486 hashmap_remove(e->signal_data, &d->priority);
487 safe_close(d->fd);
488 free(d);
489 }
490
491 static int event_make_signal_data(
492 sd_event *e,
493 int sig,
494 struct signal_data **ret) {
495
496 struct epoll_event ev;
497 struct signal_data *d;
498 bool added = false;
499 sigset_t ss_copy;
500 int64_t priority;
501 int r;
502
503 assert(e);
504
505 if (event_pid_changed(e))
506 return -ECHILD;
507
508 if (e->signal_sources && e->signal_sources[sig])
509 priority = e->signal_sources[sig]->priority;
510 else
511 priority = SD_EVENT_PRIORITY_NORMAL;
512
513 d = hashmap_get(e->signal_data, &priority);
514 if (d) {
515 if (sigismember(&d->sigset, sig) > 0) {
516 if (ret)
517 *ret = d;
518 return 0;
519 }
520 } else {
521 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
522 if (r < 0)
523 return r;
524
525 d = new(struct signal_data, 1);
526 if (!d)
527 return -ENOMEM;
528
529 *d = (struct signal_data) {
530 .wakeup = WAKEUP_SIGNAL_DATA,
531 .fd = -1,
532 .priority = priority,
533 };
534
535 r = hashmap_put(e->signal_data, &d->priority, d);
536 if (r < 0) {
537 free(d);
538 return r;
539 }
540
541 added = true;
542 }
543
544 ss_copy = d->sigset;
545 assert_se(sigaddset(&ss_copy, sig) >= 0);
546
547 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
548 if (r < 0) {
549 r = -errno;
550 goto fail;
551 }
552
553 d->sigset = ss_copy;
554
555 if (d->fd >= 0) {
556 if (ret)
557 *ret = d;
558 return 0;
559 }
560
561 d->fd = fd_move_above_stdio(r);
562
563 ev = (struct epoll_event) {
564 .events = EPOLLIN,
565 .data.ptr = d,
566 };
567
568 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
569 if (r < 0) {
570 r = -errno;
571 goto fail;
572 }
573
574 if (ret)
575 *ret = d;
576
577 return 0;
578
579 fail:
580 if (added)
581 event_free_signal_data(e, d);
582
583 return r;
584 }
585
586 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
587 assert(e);
588 assert(d);
589
590 /* Turns off the specified signal in the signal data
591 * object. If the signal mask of the object becomes empty that
592 * way removes it. */
593
594 if (sigismember(&d->sigset, sig) == 0)
595 return;
596
597 assert_se(sigdelset(&d->sigset, sig) >= 0);
598
599 if (sigisemptyset(&d->sigset)) {
600 /* If all the mask is all-zero we can get rid of the structure */
601 event_free_signal_data(e, d);
602 return;
603 }
604
605 assert(d->fd >= 0);
606
607 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
608 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
609 }
610
611 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
612 struct signal_data *d;
613 static const int64_t zero_priority = 0;
614
615 assert(e);
616
617 /* Rechecks if the specified signal is still something we are
618 * interested in. If not, we'll unmask it, and possibly drop
619 * the signalfd for it. */
620
621 if (sig == SIGCHLD &&
622 e->n_enabled_child_sources > 0)
623 return;
624
625 if (e->signal_sources &&
626 e->signal_sources[sig] &&
627 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
628 return;
629
630 /*
631 * The specified signal might be enabled in three different queues:
632 *
633 * 1) the one that belongs to the priority passed (if it is non-NULL)
634 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
635 * 3) the 0 priority (to cover the SIGCHLD case)
636 *
637 * Hence, let's remove it from all three here.
638 */
639
640 if (priority) {
641 d = hashmap_get(e->signal_data, priority);
642 if (d)
643 event_unmask_signal_data(e, d, sig);
644 }
645
646 if (e->signal_sources && e->signal_sources[sig]) {
647 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
648 if (d)
649 event_unmask_signal_data(e, d, sig);
650 }
651
652 d = hashmap_get(e->signal_data, &zero_priority);
653 if (d)
654 event_unmask_signal_data(e, d, sig);
655 }
656
657 static void source_disconnect(sd_event_source *s) {
658 sd_event *event;
659
660 assert(s);
661
662 if (!s->event)
663 return;
664
665 assert(s->event->n_sources > 0);
666
667 switch (s->type) {
668
669 case SOURCE_IO:
670 if (s->io.fd >= 0)
671 source_io_unregister(s);
672
673 break;
674
675 case SOURCE_TIME_REALTIME:
676 case SOURCE_TIME_BOOTTIME:
677 case SOURCE_TIME_MONOTONIC:
678 case SOURCE_TIME_REALTIME_ALARM:
679 case SOURCE_TIME_BOOTTIME_ALARM: {
680 struct clock_data *d;
681
682 d = event_get_clock_data(s->event, s->type);
683 assert(d);
684
685 prioq_remove(d->earliest, s, &s->time.earliest_index);
686 prioq_remove(d->latest, s, &s->time.latest_index);
687 d->needs_rearm = true;
688 break;
689 }
690
691 case SOURCE_SIGNAL:
692 if (s->signal.sig > 0) {
693
694 if (s->event->signal_sources)
695 s->event->signal_sources[s->signal.sig] = NULL;
696
697 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
698 }
699
700 break;
701
702 case SOURCE_CHILD:
703 if (s->child.pid > 0) {
704 if (s->enabled != SD_EVENT_OFF) {
705 assert(s->event->n_enabled_child_sources > 0);
706 s->event->n_enabled_child_sources--;
707 }
708
709 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
710 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
711 }
712
713 break;
714
715 case SOURCE_DEFER:
716 /* nothing */
717 break;
718
719 case SOURCE_POST:
720 set_remove(s->event->post_sources, s);
721 break;
722
723 case SOURCE_EXIT:
724 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
725 break;
726
727 case SOURCE_INOTIFY: {
728 struct inode_data *inode_data;
729
730 inode_data = s->inotify.inode_data;
731 if (inode_data) {
732 struct inotify_data *inotify_data;
733 assert_se(inotify_data = inode_data->inotify_data);
734
735 /* Detach this event source from the inode object */
736 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
737 s->inotify.inode_data = NULL;
738
739 if (s->pending) {
740 assert(inotify_data->n_pending > 0);
741 inotify_data->n_pending--;
742 }
743
744 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
745 * continued to being watched. That's because inotify doesn't really have an API for that: we
746 * can only change watch masks with access to the original inode either by fd or by path. But
747 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
748 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
749 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
750 * there), but given the need for open_by_handle_at() which is privileged and not universally
751 * available this would be quite an incomplete solution. Hence we go the other way, leave the
752 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
753 * anymore after reception. Yes, this sucks, but … Linux … */
754
755 /* Maybe release the inode data (and its inotify) */
756 event_gc_inode_data(s->event, inode_data);
757 }
758
759 break;
760 }
761
762 default:
763 assert_not_reached("Wut? I shouldn't exist.");
764 }
765
766 if (s->pending)
767 prioq_remove(s->event->pending, s, &s->pending_index);
768
769 if (s->prepare)
770 prioq_remove(s->event->prepare, s, &s->prepare_index);
771
772 event = s->event;
773
774 s->event = NULL;
775 LIST_REMOVE(sources, event->sources, s);
776 event->n_sources--;
777
778 /* Note that we don't invalidate the type here, since we still need it in order to close the fd or
779 * pidfd associated with this event source, which we'll do only on source_free(). */
780
781 if (!s->floating)
782 sd_event_unref(event);
783 }
784
785 static void source_free(sd_event_source *s) {
786 assert(s);
787
788 source_disconnect(s);
789
790 if (s->type == SOURCE_IO && s->io.owned)
791 s->io.fd = safe_close(s->io.fd);
792
793 if (s->destroy_callback)
794 s->destroy_callback(s->userdata);
795
796 free(s->description);
797 free(s);
798 }
799 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
800
801 static int source_set_pending(sd_event_source *s, bool b) {
802 int r;
803
804 assert(s);
805 assert(s->type != SOURCE_EXIT);
806
807 if (s->pending == b)
808 return 0;
809
810 s->pending = b;
811
812 if (b) {
813 s->pending_iteration = s->event->iteration;
814
815 r = prioq_put(s->event->pending, s, &s->pending_index);
816 if (r < 0) {
817 s->pending = false;
818 return r;
819 }
820 } else
821 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
822
823 if (EVENT_SOURCE_IS_TIME(s->type)) {
824 struct clock_data *d;
825
826 d = event_get_clock_data(s->event, s->type);
827 assert(d);
828
829 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
830 prioq_reshuffle(d->latest, s, &s->time.latest_index);
831 d->needs_rearm = true;
832 }
833
834 if (s->type == SOURCE_SIGNAL && !b) {
835 struct signal_data *d;
836
837 d = hashmap_get(s->event->signal_data, &s->priority);
838 if (d && d->current == s)
839 d->current = NULL;
840 }
841
842 if (s->type == SOURCE_INOTIFY) {
843
844 assert(s->inotify.inode_data);
845 assert(s->inotify.inode_data->inotify_data);
846
847 if (b)
848 s->inotify.inode_data->inotify_data->n_pending ++;
849 else {
850 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
851 s->inotify.inode_data->inotify_data->n_pending --;
852 }
853 }
854
855 return 0;
856 }
857
858 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
859 sd_event_source *s;
860
861 assert(e);
862
863 s = new(sd_event_source, 1);
864 if (!s)
865 return NULL;
866
867 *s = (struct sd_event_source) {
868 .n_ref = 1,
869 .event = e,
870 .floating = floating,
871 .type = type,
872 .pending_index = PRIOQ_IDX_NULL,
873 .prepare_index = PRIOQ_IDX_NULL,
874 };
875
876 if (!floating)
877 sd_event_ref(e);
878
879 LIST_PREPEND(sources, e->sources, s);
880 e->n_sources++;
881
882 return s;
883 }
884
885 _public_ int sd_event_add_io(
886 sd_event *e,
887 sd_event_source **ret,
888 int fd,
889 uint32_t events,
890 sd_event_io_handler_t callback,
891 void *userdata) {
892
893 _cleanup_(source_freep) sd_event_source *s = NULL;
894 int r;
895
896 assert_return(e, -EINVAL);
897 assert_return(e = event_resolve(e), -ENOPKG);
898 assert_return(fd >= 0, -EBADF);
899 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
900 assert_return(callback, -EINVAL);
901 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
902 assert_return(!event_pid_changed(e), -ECHILD);
903
904 s = source_new(e, !ret, SOURCE_IO);
905 if (!s)
906 return -ENOMEM;
907
908 s->wakeup = WAKEUP_EVENT_SOURCE;
909 s->io.fd = fd;
910 s->io.events = events;
911 s->io.callback = callback;
912 s->userdata = userdata;
913 s->enabled = SD_EVENT_ON;
914
915 r = source_io_register(s, s->enabled, events);
916 if (r < 0)
917 return r;
918
919 if (ret)
920 *ret = s;
921 TAKE_PTR(s);
922
923 return 0;
924 }
925
926 static void initialize_perturb(sd_event *e) {
927 sd_id128_t bootid = {};
928
929 /* When we sleep for longer, we try to realign the wakeup to
930 the same time within each minute/second/250ms, so that
931 events all across the system can be coalesced into a single
932 CPU wakeup. However, let's take some system-specific
933 randomness for this value, so that in a network of systems
934 with synced clocks timer events are distributed a
935 bit. Here, we calculate a perturbation usec offset from the
936 boot ID. */
937
938 if (_likely_(e->perturb != USEC_INFINITY))
939 return;
940
941 if (sd_id128_get_boot(&bootid) >= 0)
942 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
943 }
944
945 static int event_setup_timer_fd(
946 sd_event *e,
947 struct clock_data *d,
948 clockid_t clock) {
949
950 struct epoll_event ev;
951 int r, fd;
952
953 assert(e);
954 assert(d);
955
956 if (_likely_(d->fd >= 0))
957 return 0;
958
959 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
960 if (fd < 0)
961 return -errno;
962
963 fd = fd_move_above_stdio(fd);
964
965 ev = (struct epoll_event) {
966 .events = EPOLLIN,
967 .data.ptr = d,
968 };
969
970 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
971 if (r < 0) {
972 safe_close(fd);
973 return -errno;
974 }
975
976 d->fd = fd;
977 return 0;
978 }
979
980 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
981 assert(s);
982
983 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
984 }
985
986 _public_ int sd_event_add_time(
987 sd_event *e,
988 sd_event_source **ret,
989 clockid_t clock,
990 uint64_t usec,
991 uint64_t accuracy,
992 sd_event_time_handler_t callback,
993 void *userdata) {
994
995 EventSourceType type;
996 _cleanup_(source_freep) sd_event_source *s = NULL;
997 struct clock_data *d;
998 int r;
999
1000 assert_return(e, -EINVAL);
1001 assert_return(e = event_resolve(e), -ENOPKG);
1002 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1003 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1004 assert_return(!event_pid_changed(e), -ECHILD);
1005
1006 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1007 return -EOPNOTSUPP;
1008
1009 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1010 if (type < 0)
1011 return -EOPNOTSUPP;
1012
1013 if (!callback)
1014 callback = time_exit_callback;
1015
1016 d = event_get_clock_data(e, type);
1017 assert(d);
1018
1019 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1020 if (r < 0)
1021 return r;
1022
1023 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1024 if (r < 0)
1025 return r;
1026
1027 if (d->fd < 0) {
1028 r = event_setup_timer_fd(e, d, clock);
1029 if (r < 0)
1030 return r;
1031 }
1032
1033 s = source_new(e, !ret, type);
1034 if (!s)
1035 return -ENOMEM;
1036
1037 s->time.next = usec;
1038 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1039 s->time.callback = callback;
1040 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1041 s->userdata = userdata;
1042 s->enabled = SD_EVENT_ONESHOT;
1043
1044 d->needs_rearm = true;
1045
1046 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1047 if (r < 0)
1048 return r;
1049
1050 r = prioq_put(d->latest, s, &s->time.latest_index);
1051 if (r < 0)
1052 return r;
1053
1054 if (ret)
1055 *ret = s;
1056 TAKE_PTR(s);
1057
1058 return 0;
1059 }
1060
1061 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1062 assert(s);
1063
1064 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1065 }
1066
1067 _public_ int sd_event_add_signal(
1068 sd_event *e,
1069 sd_event_source **ret,
1070 int sig,
1071 sd_event_signal_handler_t callback,
1072 void *userdata) {
1073
1074 _cleanup_(source_freep) sd_event_source *s = NULL;
1075 struct signal_data *d;
1076 sigset_t ss;
1077 int r;
1078
1079 assert_return(e, -EINVAL);
1080 assert_return(e = event_resolve(e), -ENOPKG);
1081 assert_return(SIGNAL_VALID(sig), -EINVAL);
1082 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1083 assert_return(!event_pid_changed(e), -ECHILD);
1084
1085 if (!callback)
1086 callback = signal_exit_callback;
1087
1088 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1089 if (r != 0)
1090 return -r;
1091
1092 if (!sigismember(&ss, sig))
1093 return -EBUSY;
1094
1095 if (!e->signal_sources) {
1096 e->signal_sources = new0(sd_event_source*, _NSIG);
1097 if (!e->signal_sources)
1098 return -ENOMEM;
1099 } else if (e->signal_sources[sig])
1100 return -EBUSY;
1101
1102 s = source_new(e, !ret, SOURCE_SIGNAL);
1103 if (!s)
1104 return -ENOMEM;
1105
1106 s->signal.sig = sig;
1107 s->signal.callback = callback;
1108 s->userdata = userdata;
1109 s->enabled = SD_EVENT_ON;
1110
1111 e->signal_sources[sig] = s;
1112
1113 r = event_make_signal_data(e, sig, &d);
1114 if (r < 0)
1115 return r;
1116
1117 /* Use the signal name as description for the event source by default */
1118 (void) sd_event_source_set_description(s, signal_to_string(sig));
1119
1120 if (ret)
1121 *ret = s;
1122 TAKE_PTR(s);
1123
1124 return 0;
1125 }
1126
1127 _public_ int sd_event_add_child(
1128 sd_event *e,
1129 sd_event_source **ret,
1130 pid_t pid,
1131 int options,
1132 sd_event_child_handler_t callback,
1133 void *userdata) {
1134
1135 _cleanup_(source_freep) sd_event_source *s = NULL;
1136 int r;
1137
1138 assert_return(e, -EINVAL);
1139 assert_return(e = event_resolve(e), -ENOPKG);
1140 assert_return(pid > 1, -EINVAL);
1141 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1142 assert_return(options != 0, -EINVAL);
1143 assert_return(callback, -EINVAL);
1144 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1145 assert_return(!event_pid_changed(e), -ECHILD);
1146
1147 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1148 if (r < 0)
1149 return r;
1150
1151 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1152 return -EBUSY;
1153
1154 s = source_new(e, !ret, SOURCE_CHILD);
1155 if (!s)
1156 return -ENOMEM;
1157
1158 s->child.pid = pid;
1159 s->child.options = options;
1160 s->child.callback = callback;
1161 s->userdata = userdata;
1162 s->enabled = SD_EVENT_ONESHOT;
1163
1164 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1165 if (r < 0)
1166 return r;
1167
1168 e->n_enabled_child_sources++;
1169
1170 r = event_make_signal_data(e, SIGCHLD, NULL);
1171 if (r < 0) {
1172 e->n_enabled_child_sources--;
1173 return r;
1174 }
1175
1176 e->need_process_child = true;
1177
1178 if (ret)
1179 *ret = s;
1180 TAKE_PTR(s);
1181
1182 return 0;
1183 }
1184
1185 _public_ int sd_event_add_defer(
1186 sd_event *e,
1187 sd_event_source **ret,
1188 sd_event_handler_t callback,
1189 void *userdata) {
1190
1191 _cleanup_(source_freep) sd_event_source *s = NULL;
1192 int r;
1193
1194 assert_return(e, -EINVAL);
1195 assert_return(e = event_resolve(e), -ENOPKG);
1196 assert_return(callback, -EINVAL);
1197 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1198 assert_return(!event_pid_changed(e), -ECHILD);
1199
1200 s = source_new(e, !ret, SOURCE_DEFER);
1201 if (!s)
1202 return -ENOMEM;
1203
1204 s->defer.callback = callback;
1205 s->userdata = userdata;
1206 s->enabled = SD_EVENT_ONESHOT;
1207
1208 r = source_set_pending(s, true);
1209 if (r < 0)
1210 return r;
1211
1212 if (ret)
1213 *ret = s;
1214 TAKE_PTR(s);
1215
1216 return 0;
1217 }
1218
1219 _public_ int sd_event_add_post(
1220 sd_event *e,
1221 sd_event_source **ret,
1222 sd_event_handler_t callback,
1223 void *userdata) {
1224
1225 _cleanup_(source_freep) sd_event_source *s = NULL;
1226 int r;
1227
1228 assert_return(e, -EINVAL);
1229 assert_return(e = event_resolve(e), -ENOPKG);
1230 assert_return(callback, -EINVAL);
1231 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1232 assert_return(!event_pid_changed(e), -ECHILD);
1233
1234 r = set_ensure_allocated(&e->post_sources, NULL);
1235 if (r < 0)
1236 return r;
1237
1238 s = source_new(e, !ret, SOURCE_POST);
1239 if (!s)
1240 return -ENOMEM;
1241
1242 s->post.callback = callback;
1243 s->userdata = userdata;
1244 s->enabled = SD_EVENT_ON;
1245
1246 r = set_put(e->post_sources, s);
1247 if (r < 0)
1248 return r;
1249
1250 if (ret)
1251 *ret = s;
1252 TAKE_PTR(s);
1253
1254 return 0;
1255 }
1256
1257 _public_ int sd_event_add_exit(
1258 sd_event *e,
1259 sd_event_source **ret,
1260 sd_event_handler_t callback,
1261 void *userdata) {
1262
1263 _cleanup_(source_freep) sd_event_source *s = NULL;
1264 int r;
1265
1266 assert_return(e, -EINVAL);
1267 assert_return(e = event_resolve(e), -ENOPKG);
1268 assert_return(callback, -EINVAL);
1269 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1270 assert_return(!event_pid_changed(e), -ECHILD);
1271
1272 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1273 if (r < 0)
1274 return r;
1275
1276 s = source_new(e, !ret, SOURCE_EXIT);
1277 if (!s)
1278 return -ENOMEM;
1279
1280 s->exit.callback = callback;
1281 s->userdata = userdata;
1282 s->exit.prioq_index = PRIOQ_IDX_NULL;
1283 s->enabled = SD_EVENT_ONESHOT;
1284
1285 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1286 if (r < 0)
1287 return r;
1288
1289 if (ret)
1290 *ret = s;
1291 TAKE_PTR(s);
1292
1293 return 0;
1294 }
1295
1296 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1297 assert(e);
1298
1299 if (!d)
1300 return;
1301
1302 assert(hashmap_isempty(d->inodes));
1303 assert(hashmap_isempty(d->wd));
1304
1305 if (d->buffer_filled > 0)
1306 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1307
1308 hashmap_free(d->inodes);
1309 hashmap_free(d->wd);
1310
1311 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1312
1313 if (d->fd >= 0) {
1314 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1315 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1316
1317 safe_close(d->fd);
1318 }
1319 free(d);
1320 }
1321
1322 static int event_make_inotify_data(
1323 sd_event *e,
1324 int64_t priority,
1325 struct inotify_data **ret) {
1326
1327 _cleanup_close_ int fd = -1;
1328 struct inotify_data *d;
1329 struct epoll_event ev;
1330 int r;
1331
1332 assert(e);
1333
1334 d = hashmap_get(e->inotify_data, &priority);
1335 if (d) {
1336 if (ret)
1337 *ret = d;
1338 return 0;
1339 }
1340
1341 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1342 if (fd < 0)
1343 return -errno;
1344
1345 fd = fd_move_above_stdio(fd);
1346
1347 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1348 if (r < 0)
1349 return r;
1350
1351 d = new(struct inotify_data, 1);
1352 if (!d)
1353 return -ENOMEM;
1354
1355 *d = (struct inotify_data) {
1356 .wakeup = WAKEUP_INOTIFY_DATA,
1357 .fd = TAKE_FD(fd),
1358 .priority = priority,
1359 };
1360
1361 r = hashmap_put(e->inotify_data, &d->priority, d);
1362 if (r < 0) {
1363 d->fd = safe_close(d->fd);
1364 free(d);
1365 return r;
1366 }
1367
1368 ev = (struct epoll_event) {
1369 .events = EPOLLIN,
1370 .data.ptr = d,
1371 };
1372
1373 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1374 r = -errno;
1375 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1376 * remove the fd from the epoll first, which we don't want as we couldn't
1377 * add it in the first place. */
1378 event_free_inotify_data(e, d);
1379 return r;
1380 }
1381
1382 if (ret)
1383 *ret = d;
1384
1385 return 1;
1386 }
1387
1388 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1389 int r;
1390
1391 assert(x);
1392 assert(y);
1393
1394 r = CMP(x->dev, y->dev);
1395 if (r != 0)
1396 return r;
1397
1398 return CMP(x->ino, y->ino);
1399 }
1400
1401 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1402 assert(d);
1403
1404 siphash24_compress(&d->dev, sizeof(d->dev), state);
1405 siphash24_compress(&d->ino, sizeof(d->ino), state);
1406 }
1407
1408 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1409
1410 static void event_free_inode_data(
1411 sd_event *e,
1412 struct inode_data *d) {
1413
1414 assert(e);
1415
1416 if (!d)
1417 return;
1418
1419 assert(!d->event_sources);
1420
1421 if (d->fd >= 0) {
1422 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1423 safe_close(d->fd);
1424 }
1425
1426 if (d->inotify_data) {
1427
1428 if (d->wd >= 0) {
1429 if (d->inotify_data->fd >= 0) {
1430 /* So here's a problem. At the time this runs the watch descriptor might already be
1431 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1432 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1433 * likely case to happen. */
1434
1435 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1436 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1437 }
1438
1439 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1440 }
1441
1442 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1443 }
1444
1445 free(d);
1446 }
1447
1448 static void event_gc_inode_data(
1449 sd_event *e,
1450 struct inode_data *d) {
1451
1452 struct inotify_data *inotify_data;
1453
1454 assert(e);
1455
1456 if (!d)
1457 return;
1458
1459 if (d->event_sources)
1460 return;
1461
1462 inotify_data = d->inotify_data;
1463 event_free_inode_data(e, d);
1464
1465 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1466 event_free_inotify_data(e, inotify_data);
1467 }
1468
1469 static int event_make_inode_data(
1470 sd_event *e,
1471 struct inotify_data *inotify_data,
1472 dev_t dev,
1473 ino_t ino,
1474 struct inode_data **ret) {
1475
1476 struct inode_data *d, key;
1477 int r;
1478
1479 assert(e);
1480 assert(inotify_data);
1481
1482 key = (struct inode_data) {
1483 .ino = ino,
1484 .dev = dev,
1485 };
1486
1487 d = hashmap_get(inotify_data->inodes, &key);
1488 if (d) {
1489 if (ret)
1490 *ret = d;
1491
1492 return 0;
1493 }
1494
1495 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1496 if (r < 0)
1497 return r;
1498
1499 d = new(struct inode_data, 1);
1500 if (!d)
1501 return -ENOMEM;
1502
1503 *d = (struct inode_data) {
1504 .dev = dev,
1505 .ino = ino,
1506 .wd = -1,
1507 .fd = -1,
1508 .inotify_data = inotify_data,
1509 };
1510
1511 r = hashmap_put(inotify_data->inodes, d, d);
1512 if (r < 0) {
1513 free(d);
1514 return r;
1515 }
1516
1517 if (ret)
1518 *ret = d;
1519
1520 return 1;
1521 }
1522
1523 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1524 bool excl_unlink = true;
1525 uint32_t combined = 0;
1526 sd_event_source *s;
1527
1528 assert(d);
1529
1530 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1531 * the IN_EXCL_UNLINK flag is ANDed instead.
1532 *
1533 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1534 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1535 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1536 * events we don't care for client-side. */
1537
1538 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1539
1540 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1541 excl_unlink = false;
1542
1543 combined |= s->inotify.mask;
1544 }
1545
1546 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1547 }
1548
1549 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1550 uint32_t combined_mask;
1551 int wd, r;
1552
1553 assert(d);
1554 assert(d->fd >= 0);
1555
1556 combined_mask = inode_data_determine_mask(d);
1557
1558 if (d->wd >= 0 && combined_mask == d->combined_mask)
1559 return 0;
1560
1561 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1562 if (r < 0)
1563 return r;
1564
1565 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1566 if (wd < 0)
1567 return -errno;
1568
1569 if (d->wd < 0) {
1570 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1571 if (r < 0) {
1572 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1573 return r;
1574 }
1575
1576 d->wd = wd;
1577
1578 } else if (d->wd != wd) {
1579
1580 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1581 (void) inotify_rm_watch(d->fd, wd);
1582 return -EINVAL;
1583 }
1584
1585 d->combined_mask = combined_mask;
1586 return 1;
1587 }
1588
1589 _public_ int sd_event_add_inotify(
1590 sd_event *e,
1591 sd_event_source **ret,
1592 const char *path,
1593 uint32_t mask,
1594 sd_event_inotify_handler_t callback,
1595 void *userdata) {
1596
1597 struct inotify_data *inotify_data = NULL;
1598 struct inode_data *inode_data = NULL;
1599 _cleanup_close_ int fd = -1;
1600 _cleanup_(source_freep) sd_event_source *s = NULL;
1601 struct stat st;
1602 int r;
1603
1604 assert_return(e, -EINVAL);
1605 assert_return(e = event_resolve(e), -ENOPKG);
1606 assert_return(path, -EINVAL);
1607 assert_return(callback, -EINVAL);
1608 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1609 assert_return(!event_pid_changed(e), -ECHILD);
1610
1611 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1612 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1613 * the user can't use them for us. */
1614 if (mask & IN_MASK_ADD)
1615 return -EINVAL;
1616
1617 fd = open(path, O_PATH|O_CLOEXEC|
1618 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1619 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1620 if (fd < 0)
1621 return -errno;
1622
1623 if (fstat(fd, &st) < 0)
1624 return -errno;
1625
1626 s = source_new(e, !ret, SOURCE_INOTIFY);
1627 if (!s)
1628 return -ENOMEM;
1629
1630 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1631 s->inotify.mask = mask;
1632 s->inotify.callback = callback;
1633 s->userdata = userdata;
1634
1635 /* Allocate an inotify object for this priority, and an inode object within it */
1636 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1637 if (r < 0)
1638 return r;
1639
1640 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1641 if (r < 0) {
1642 event_free_inotify_data(e, inotify_data);
1643 return r;
1644 }
1645
1646 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1647 * the event source, until then, for which we need the original inode. */
1648 if (inode_data->fd < 0) {
1649 inode_data->fd = TAKE_FD(fd);
1650 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1651 }
1652
1653 /* Link our event source to the inode data object */
1654 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1655 s->inotify.inode_data = inode_data;
1656
1657 /* Actually realize the watch now */
1658 r = inode_data_realize_watch(e, inode_data);
1659 if (r < 0)
1660 return r;
1661
1662 (void) sd_event_source_set_description(s, path);
1663
1664 if (ret)
1665 *ret = s;
1666 TAKE_PTR(s);
1667
1668 return 0;
1669 }
1670
1671 static sd_event_source* event_source_free(sd_event_source *s) {
1672 if (!s)
1673 return NULL;
1674
1675 /* Here's a special hack: when we are called from a
1676 * dispatch handler we won't free the event source
1677 * immediately, but we will detach the fd from the
1678 * epoll. This way it is safe for the caller to unref
1679 * the event source and immediately close the fd, but
1680 * we still retain a valid event source object after
1681 * the callback. */
1682
1683 if (s->dispatching) {
1684 if (s->type == SOURCE_IO)
1685 source_io_unregister(s);
1686
1687 source_disconnect(s);
1688 } else
1689 source_free(s);
1690
1691 return NULL;
1692 }
1693
1694 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1695
1696 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1697 assert_return(s, -EINVAL);
1698 assert_return(!event_pid_changed(s->event), -ECHILD);
1699
1700 return free_and_strdup(&s->description, description);
1701 }
1702
1703 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1704 assert_return(s, -EINVAL);
1705 assert_return(description, -EINVAL);
1706 assert_return(!event_pid_changed(s->event), -ECHILD);
1707
1708 if (!s->description)
1709 return -ENXIO;
1710
1711 *description = s->description;
1712 return 0;
1713 }
1714
1715 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1716 assert_return(s, NULL);
1717
1718 return s->event;
1719 }
1720
1721 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1722 assert_return(s, -EINVAL);
1723 assert_return(s->type != SOURCE_EXIT, -EDOM);
1724 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1725 assert_return(!event_pid_changed(s->event), -ECHILD);
1726
1727 return s->pending;
1728 }
1729
1730 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1731 assert_return(s, -EINVAL);
1732 assert_return(s->type == SOURCE_IO, -EDOM);
1733 assert_return(!event_pid_changed(s->event), -ECHILD);
1734
1735 return s->io.fd;
1736 }
1737
1738 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1739 int r;
1740
1741 assert_return(s, -EINVAL);
1742 assert_return(fd >= 0, -EBADF);
1743 assert_return(s->type == SOURCE_IO, -EDOM);
1744 assert_return(!event_pid_changed(s->event), -ECHILD);
1745
1746 if (s->io.fd == fd)
1747 return 0;
1748
1749 if (s->enabled == SD_EVENT_OFF) {
1750 s->io.fd = fd;
1751 s->io.registered = false;
1752 } else {
1753 int saved_fd;
1754
1755 saved_fd = s->io.fd;
1756 assert(s->io.registered);
1757
1758 s->io.fd = fd;
1759 s->io.registered = false;
1760
1761 r = source_io_register(s, s->enabled, s->io.events);
1762 if (r < 0) {
1763 s->io.fd = saved_fd;
1764 s->io.registered = true;
1765 return r;
1766 }
1767
1768 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1769 }
1770
1771 return 0;
1772 }
1773
1774 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1775 assert_return(s, -EINVAL);
1776 assert_return(s->type == SOURCE_IO, -EDOM);
1777
1778 return s->io.owned;
1779 }
1780
1781 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1782 assert_return(s, -EINVAL);
1783 assert_return(s->type == SOURCE_IO, -EDOM);
1784
1785 s->io.owned = own;
1786 return 0;
1787 }
1788
1789 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1790 assert_return(s, -EINVAL);
1791 assert_return(events, -EINVAL);
1792 assert_return(s->type == SOURCE_IO, -EDOM);
1793 assert_return(!event_pid_changed(s->event), -ECHILD);
1794
1795 *events = s->io.events;
1796 return 0;
1797 }
1798
1799 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1800 int r;
1801
1802 assert_return(s, -EINVAL);
1803 assert_return(s->type == SOURCE_IO, -EDOM);
1804 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1805 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1806 assert_return(!event_pid_changed(s->event), -ECHILD);
1807
1808 /* edge-triggered updates are never skipped, so we can reset edges */
1809 if (s->io.events == events && !(events & EPOLLET))
1810 return 0;
1811
1812 r = source_set_pending(s, false);
1813 if (r < 0)
1814 return r;
1815
1816 if (s->enabled != SD_EVENT_OFF) {
1817 r = source_io_register(s, s->enabled, events);
1818 if (r < 0)
1819 return r;
1820 }
1821
1822 s->io.events = events;
1823
1824 return 0;
1825 }
1826
1827 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1828 assert_return(s, -EINVAL);
1829 assert_return(revents, -EINVAL);
1830 assert_return(s->type == SOURCE_IO, -EDOM);
1831 assert_return(s->pending, -ENODATA);
1832 assert_return(!event_pid_changed(s->event), -ECHILD);
1833
1834 *revents = s->io.revents;
1835 return 0;
1836 }
1837
1838 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1839 assert_return(s, -EINVAL);
1840 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1841 assert_return(!event_pid_changed(s->event), -ECHILD);
1842
1843 return s->signal.sig;
1844 }
1845
1846 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1847 assert_return(s, -EINVAL);
1848 assert_return(!event_pid_changed(s->event), -ECHILD);
1849
1850 *priority = s->priority;
1851 return 0;
1852 }
1853
1854 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1855 bool rm_inotify = false, rm_inode = false;
1856 struct inotify_data *new_inotify_data = NULL;
1857 struct inode_data *new_inode_data = NULL;
1858 int r;
1859
1860 assert_return(s, -EINVAL);
1861 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1862 assert_return(!event_pid_changed(s->event), -ECHILD);
1863
1864 if (s->priority == priority)
1865 return 0;
1866
1867 if (s->type == SOURCE_INOTIFY) {
1868 struct inode_data *old_inode_data;
1869
1870 assert(s->inotify.inode_data);
1871 old_inode_data = s->inotify.inode_data;
1872
1873 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1874 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1875 * events we allow priority changes only until the first following iteration. */
1876 if (old_inode_data->fd < 0)
1877 return -EOPNOTSUPP;
1878
1879 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
1880 if (r < 0)
1881 return r;
1882 rm_inotify = r > 0;
1883
1884 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
1885 if (r < 0)
1886 goto fail;
1887 rm_inode = r > 0;
1888
1889 if (new_inode_data->fd < 0) {
1890 /* Duplicate the fd for the new inode object if we don't have any yet */
1891 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
1892 if (new_inode_data->fd < 0) {
1893 r = -errno;
1894 goto fail;
1895 }
1896
1897 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
1898 }
1899
1900 /* Move the event source to the new inode data structure */
1901 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
1902 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
1903 s->inotify.inode_data = new_inode_data;
1904
1905 /* Now create the new watch */
1906 r = inode_data_realize_watch(s->event, new_inode_data);
1907 if (r < 0) {
1908 /* Move it back */
1909 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
1910 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
1911 s->inotify.inode_data = old_inode_data;
1912 goto fail;
1913 }
1914
1915 s->priority = priority;
1916
1917 event_gc_inode_data(s->event, old_inode_data);
1918
1919 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1920 struct signal_data *old, *d;
1921
1922 /* Move us from the signalfd belonging to the old
1923 * priority to the signalfd of the new priority */
1924
1925 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1926
1927 s->priority = priority;
1928
1929 r = event_make_signal_data(s->event, s->signal.sig, &d);
1930 if (r < 0) {
1931 s->priority = old->priority;
1932 return r;
1933 }
1934
1935 event_unmask_signal_data(s->event, old, s->signal.sig);
1936 } else
1937 s->priority = priority;
1938
1939 if (s->pending)
1940 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1941
1942 if (s->prepare)
1943 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1944
1945 if (s->type == SOURCE_EXIT)
1946 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1947
1948 return 0;
1949
1950 fail:
1951 if (rm_inode)
1952 event_free_inode_data(s->event, new_inode_data);
1953
1954 if (rm_inotify)
1955 event_free_inotify_data(s->event, new_inotify_data);
1956
1957 return r;
1958 }
1959
1960 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1961 assert_return(s, -EINVAL);
1962 assert_return(!event_pid_changed(s->event), -ECHILD);
1963
1964 if (m)
1965 *m = s->enabled;
1966 return s->enabled != SD_EVENT_OFF;
1967 }
1968
1969 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1970 int r;
1971
1972 assert_return(s, -EINVAL);
1973 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1974 assert_return(!event_pid_changed(s->event), -ECHILD);
1975
1976 /* If we are dead anyway, we are fine with turning off
1977 * sources, but everything else needs to fail. */
1978 if (s->event->state == SD_EVENT_FINISHED)
1979 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1980
1981 if (s->enabled == m)
1982 return 0;
1983
1984 if (m == SD_EVENT_OFF) {
1985
1986 /* Unset the pending flag when this event source is disabled */
1987 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1988 r = source_set_pending(s, false);
1989 if (r < 0)
1990 return r;
1991 }
1992
1993 switch (s->type) {
1994
1995 case SOURCE_IO:
1996 source_io_unregister(s);
1997 s->enabled = m;
1998 break;
1999
2000 case SOURCE_TIME_REALTIME:
2001 case SOURCE_TIME_BOOTTIME:
2002 case SOURCE_TIME_MONOTONIC:
2003 case SOURCE_TIME_REALTIME_ALARM:
2004 case SOURCE_TIME_BOOTTIME_ALARM: {
2005 struct clock_data *d;
2006
2007 s->enabled = m;
2008 d = event_get_clock_data(s->event, s->type);
2009 assert(d);
2010
2011 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2012 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2013 d->needs_rearm = true;
2014 break;
2015 }
2016
2017 case SOURCE_SIGNAL:
2018 s->enabled = m;
2019
2020 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2021 break;
2022
2023 case SOURCE_CHILD:
2024 s->enabled = m;
2025
2026 assert(s->event->n_enabled_child_sources > 0);
2027 s->event->n_enabled_child_sources--;
2028
2029 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2030 break;
2031
2032 case SOURCE_EXIT:
2033 s->enabled = m;
2034 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2035 break;
2036
2037 case SOURCE_DEFER:
2038 case SOURCE_POST:
2039 case SOURCE_INOTIFY:
2040 s->enabled = m;
2041 break;
2042
2043 default:
2044 assert_not_reached("Wut? I shouldn't exist.");
2045 }
2046
2047 } else {
2048
2049 /* Unset the pending flag when this event source is enabled */
2050 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2051 r = source_set_pending(s, false);
2052 if (r < 0)
2053 return r;
2054 }
2055
2056 switch (s->type) {
2057
2058 case SOURCE_IO:
2059 r = source_io_register(s, m, s->io.events);
2060 if (r < 0)
2061 return r;
2062
2063 s->enabled = m;
2064 break;
2065
2066 case SOURCE_TIME_REALTIME:
2067 case SOURCE_TIME_BOOTTIME:
2068 case SOURCE_TIME_MONOTONIC:
2069 case SOURCE_TIME_REALTIME_ALARM:
2070 case SOURCE_TIME_BOOTTIME_ALARM: {
2071 struct clock_data *d;
2072
2073 s->enabled = m;
2074 d = event_get_clock_data(s->event, s->type);
2075 assert(d);
2076
2077 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2078 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2079 d->needs_rearm = true;
2080 break;
2081 }
2082
2083 case SOURCE_SIGNAL:
2084
2085 s->enabled = m;
2086
2087 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2088 if (r < 0) {
2089 s->enabled = SD_EVENT_OFF;
2090 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2091 return r;
2092 }
2093
2094 break;
2095
2096 case SOURCE_CHILD:
2097
2098 if (s->enabled == SD_EVENT_OFF)
2099 s->event->n_enabled_child_sources++;
2100
2101 s->enabled = m;
2102
2103 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2104 if (r < 0) {
2105 s->enabled = SD_EVENT_OFF;
2106 s->event->n_enabled_child_sources--;
2107 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2108 return r;
2109 }
2110
2111 break;
2112
2113 case SOURCE_EXIT:
2114 s->enabled = m;
2115 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2116 break;
2117
2118 case SOURCE_DEFER:
2119 case SOURCE_POST:
2120 case SOURCE_INOTIFY:
2121 s->enabled = m;
2122 break;
2123
2124 default:
2125 assert_not_reached("Wut? I shouldn't exist.");
2126 }
2127 }
2128
2129 if (s->pending)
2130 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2131
2132 if (s->prepare)
2133 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2134
2135 return 0;
2136 }
2137
2138 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2139 assert_return(s, -EINVAL);
2140 assert_return(usec, -EINVAL);
2141 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2142 assert_return(!event_pid_changed(s->event), -ECHILD);
2143
2144 *usec = s->time.next;
2145 return 0;
2146 }
2147
2148 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2149 struct clock_data *d;
2150 int r;
2151
2152 assert_return(s, -EINVAL);
2153 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2154 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2155 assert_return(!event_pid_changed(s->event), -ECHILD);
2156
2157 r = source_set_pending(s, false);
2158 if (r < 0)
2159 return r;
2160
2161 s->time.next = usec;
2162
2163 d = event_get_clock_data(s->event, s->type);
2164 assert(d);
2165
2166 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2167 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2168 d->needs_rearm = true;
2169
2170 return 0;
2171 }
2172
2173 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2174 assert_return(s, -EINVAL);
2175 assert_return(usec, -EINVAL);
2176 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2177 assert_return(!event_pid_changed(s->event), -ECHILD);
2178
2179 *usec = s->time.accuracy;
2180 return 0;
2181 }
2182
2183 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2184 struct clock_data *d;
2185 int r;
2186
2187 assert_return(s, -EINVAL);
2188 assert_return(usec != (uint64_t) -1, -EINVAL);
2189 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2190 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2191 assert_return(!event_pid_changed(s->event), -ECHILD);
2192
2193 r = source_set_pending(s, false);
2194 if (r < 0)
2195 return r;
2196
2197 if (usec == 0)
2198 usec = DEFAULT_ACCURACY_USEC;
2199
2200 s->time.accuracy = usec;
2201
2202 d = event_get_clock_data(s->event, s->type);
2203 assert(d);
2204
2205 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2206 d->needs_rearm = true;
2207
2208 return 0;
2209 }
2210
2211 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2212 assert_return(s, -EINVAL);
2213 assert_return(clock, -EINVAL);
2214 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2215 assert_return(!event_pid_changed(s->event), -ECHILD);
2216
2217 *clock = event_source_type_to_clock(s->type);
2218 return 0;
2219 }
2220
2221 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2222 assert_return(s, -EINVAL);
2223 assert_return(pid, -EINVAL);
2224 assert_return(s->type == SOURCE_CHILD, -EDOM);
2225 assert_return(!event_pid_changed(s->event), -ECHILD);
2226
2227 *pid = s->child.pid;
2228 return 0;
2229 }
2230
2231 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2232 assert_return(s, -EINVAL);
2233 assert_return(mask, -EINVAL);
2234 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2235 assert_return(!event_pid_changed(s->event), -ECHILD);
2236
2237 *mask = s->inotify.mask;
2238 return 0;
2239 }
2240
2241 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2242 int r;
2243
2244 assert_return(s, -EINVAL);
2245 assert_return(s->type != SOURCE_EXIT, -EDOM);
2246 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2247 assert_return(!event_pid_changed(s->event), -ECHILD);
2248
2249 if (s->prepare == callback)
2250 return 0;
2251
2252 if (callback && s->prepare) {
2253 s->prepare = callback;
2254 return 0;
2255 }
2256
2257 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2258 if (r < 0)
2259 return r;
2260
2261 s->prepare = callback;
2262
2263 if (callback) {
2264 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2265 if (r < 0)
2266 return r;
2267 } else
2268 prioq_remove(s->event->prepare, s, &s->prepare_index);
2269
2270 return 0;
2271 }
2272
2273 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2274 assert_return(s, NULL);
2275
2276 return s->userdata;
2277 }
2278
2279 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2280 void *ret;
2281
2282 assert_return(s, NULL);
2283
2284 ret = s->userdata;
2285 s->userdata = userdata;
2286
2287 return ret;
2288 }
2289
2290 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2291 usec_t c;
2292 assert(e);
2293 assert(a <= b);
2294
2295 if (a <= 0)
2296 return 0;
2297 if (a >= USEC_INFINITY)
2298 return USEC_INFINITY;
2299
2300 if (b <= a + 1)
2301 return a;
2302
2303 initialize_perturb(e);
2304
2305 /*
2306 Find a good time to wake up again between times a and b. We
2307 have two goals here:
2308
2309 a) We want to wake up as seldom as possible, hence prefer
2310 later times over earlier times.
2311
2312 b) But if we have to wake up, then let's make sure to
2313 dispatch as much as possible on the entire system.
2314
2315 We implement this by waking up everywhere at the same time
2316 within any given minute if we can, synchronised via the
2317 perturbation value determined from the boot ID. If we can't,
2318 then we try to find the same spot in every 10s, then 1s and
2319 then 250ms step. Otherwise, we pick the last possible time
2320 to wake up.
2321 */
2322
2323 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2324 if (c >= b) {
2325 if (_unlikely_(c < USEC_PER_MINUTE))
2326 return b;
2327
2328 c -= USEC_PER_MINUTE;
2329 }
2330
2331 if (c >= a)
2332 return c;
2333
2334 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2335 if (c >= b) {
2336 if (_unlikely_(c < USEC_PER_SEC*10))
2337 return b;
2338
2339 c -= USEC_PER_SEC*10;
2340 }
2341
2342 if (c >= a)
2343 return c;
2344
2345 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2346 if (c >= b) {
2347 if (_unlikely_(c < USEC_PER_SEC))
2348 return b;
2349
2350 c -= USEC_PER_SEC;
2351 }
2352
2353 if (c >= a)
2354 return c;
2355
2356 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2357 if (c >= b) {
2358 if (_unlikely_(c < USEC_PER_MSEC*250))
2359 return b;
2360
2361 c -= USEC_PER_MSEC*250;
2362 }
2363
2364 if (c >= a)
2365 return c;
2366
2367 return b;
2368 }
2369
2370 static int event_arm_timer(
2371 sd_event *e,
2372 struct clock_data *d) {
2373
2374 struct itimerspec its = {};
2375 sd_event_source *a, *b;
2376 usec_t t;
2377 int r;
2378
2379 assert(e);
2380 assert(d);
2381
2382 if (!d->needs_rearm)
2383 return 0;
2384 else
2385 d->needs_rearm = false;
2386
2387 a = prioq_peek(d->earliest);
2388 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2389
2390 if (d->fd < 0)
2391 return 0;
2392
2393 if (d->next == USEC_INFINITY)
2394 return 0;
2395
2396 /* disarm */
2397 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2398 if (r < 0)
2399 return r;
2400
2401 d->next = USEC_INFINITY;
2402 return 0;
2403 }
2404
2405 b = prioq_peek(d->latest);
2406 assert_se(b && b->enabled != SD_EVENT_OFF);
2407
2408 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2409 if (d->next == t)
2410 return 0;
2411
2412 assert_se(d->fd >= 0);
2413
2414 if (t == 0) {
2415 /* We don' want to disarm here, just mean some time looooong ago. */
2416 its.it_value.tv_sec = 0;
2417 its.it_value.tv_nsec = 1;
2418 } else
2419 timespec_store(&its.it_value, t);
2420
2421 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2422 if (r < 0)
2423 return -errno;
2424
2425 d->next = t;
2426 return 0;
2427 }
2428
2429 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2430 assert(e);
2431 assert(s);
2432 assert(s->type == SOURCE_IO);
2433
2434 /* If the event source was already pending, we just OR in the
2435 * new revents, otherwise we reset the value. The ORing is
2436 * necessary to handle EPOLLONESHOT events properly where
2437 * readability might happen independently of writability, and
2438 * we need to keep track of both */
2439
2440 if (s->pending)
2441 s->io.revents |= revents;
2442 else
2443 s->io.revents = revents;
2444
2445 return source_set_pending(s, true);
2446 }
2447
2448 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2449 uint64_t x;
2450 ssize_t ss;
2451
2452 assert(e);
2453 assert(fd >= 0);
2454
2455 assert_return(events == EPOLLIN, -EIO);
2456
2457 ss = read(fd, &x, sizeof(x));
2458 if (ss < 0) {
2459 if (IN_SET(errno, EAGAIN, EINTR))
2460 return 0;
2461
2462 return -errno;
2463 }
2464
2465 if (_unlikely_(ss != sizeof(x)))
2466 return -EIO;
2467
2468 if (next)
2469 *next = USEC_INFINITY;
2470
2471 return 0;
2472 }
2473
2474 static int process_timer(
2475 sd_event *e,
2476 usec_t n,
2477 struct clock_data *d) {
2478
2479 sd_event_source *s;
2480 int r;
2481
2482 assert(e);
2483 assert(d);
2484
2485 for (;;) {
2486 s = prioq_peek(d->earliest);
2487 if (!s ||
2488 s->time.next > n ||
2489 s->enabled == SD_EVENT_OFF ||
2490 s->pending)
2491 break;
2492
2493 r = source_set_pending(s, true);
2494 if (r < 0)
2495 return r;
2496
2497 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2498 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2499 d->needs_rearm = true;
2500 }
2501
2502 return 0;
2503 }
2504
2505 static int process_child(sd_event *e) {
2506 sd_event_source *s;
2507 Iterator i;
2508 int r;
2509
2510 assert(e);
2511
2512 e->need_process_child = false;
2513
2514 /*
2515 So, this is ugly. We iteratively invoke waitid() with P_PID
2516 + WNOHANG for each PID we wait for, instead of using
2517 P_ALL. This is because we only want to get child
2518 information of very specific child processes, and not all
2519 of them. We might not have processed the SIGCHLD even of a
2520 previous invocation and we don't want to maintain a
2521 unbounded *per-child* event queue, hence we really don't
2522 want anything flushed out of the kernel's queue that we
2523 don't care about. Since this is O(n) this means that if you
2524 have a lot of processes you probably want to handle SIGCHLD
2525 yourself.
2526
2527 We do not reap the children here (by using WNOWAIT), this
2528 is only done after the event source is dispatched so that
2529 the callback still sees the process as a zombie.
2530 */
2531
2532 HASHMAP_FOREACH(s, e->child_sources, i) {
2533 assert(s->type == SOURCE_CHILD);
2534
2535 if (s->pending)
2536 continue;
2537
2538 if (s->enabled == SD_EVENT_OFF)
2539 continue;
2540
2541 zero(s->child.siginfo);
2542 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2543 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2544 if (r < 0)
2545 return -errno;
2546
2547 if (s->child.siginfo.si_pid != 0) {
2548 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2549
2550 if (!zombie && (s->child.options & WEXITED)) {
2551 /* If the child isn't dead then let's
2552 * immediately remove the state change
2553 * from the queue, since there's no
2554 * benefit in leaving it queued */
2555
2556 assert(s->child.options & (WSTOPPED|WCONTINUED));
2557 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2558 }
2559
2560 r = source_set_pending(s, true);
2561 if (r < 0)
2562 return r;
2563 }
2564 }
2565
2566 return 0;
2567 }
2568
2569 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2570 bool read_one = false;
2571 int r;
2572
2573 assert(e);
2574 assert(d);
2575 assert_return(events == EPOLLIN, -EIO);
2576
2577 /* If there's a signal queued on this priority and SIGCHLD is
2578 on this priority too, then make sure to recheck the
2579 children we watch. This is because we only ever dequeue
2580 the first signal per priority, and if we dequeue one, and
2581 SIGCHLD might be enqueued later we wouldn't know, but we
2582 might have higher priority children we care about hence we
2583 need to check that explicitly. */
2584
2585 if (sigismember(&d->sigset, SIGCHLD))
2586 e->need_process_child = true;
2587
2588 /* If there's already an event source pending for this
2589 * priority we don't read another */
2590 if (d->current)
2591 return 0;
2592
2593 for (;;) {
2594 struct signalfd_siginfo si;
2595 ssize_t n;
2596 sd_event_source *s = NULL;
2597
2598 n = read(d->fd, &si, sizeof(si));
2599 if (n < 0) {
2600 if (IN_SET(errno, EAGAIN, EINTR))
2601 return read_one;
2602
2603 return -errno;
2604 }
2605
2606 if (_unlikely_(n != sizeof(si)))
2607 return -EIO;
2608
2609 assert(SIGNAL_VALID(si.ssi_signo));
2610
2611 read_one = true;
2612
2613 if (e->signal_sources)
2614 s = e->signal_sources[si.ssi_signo];
2615 if (!s)
2616 continue;
2617 if (s->pending)
2618 continue;
2619
2620 s->signal.siginfo = si;
2621 d->current = s;
2622
2623 r = source_set_pending(s, true);
2624 if (r < 0)
2625 return r;
2626
2627 return 1;
2628 }
2629 }
2630
2631 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2632 ssize_t n;
2633
2634 assert(e);
2635 assert(d);
2636
2637 assert_return(revents == EPOLLIN, -EIO);
2638
2639 /* If there's already an event source pending for this priority, don't read another */
2640 if (d->n_pending > 0)
2641 return 0;
2642
2643 /* Is the read buffer non-empty? If so, let's not read more */
2644 if (d->buffer_filled > 0)
2645 return 0;
2646
2647 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2648 if (n < 0) {
2649 if (IN_SET(errno, EAGAIN, EINTR))
2650 return 0;
2651
2652 return -errno;
2653 }
2654
2655 assert(n > 0);
2656 d->buffer_filled = (size_t) n;
2657 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2658
2659 return 1;
2660 }
2661
2662 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2663 assert(e);
2664 assert(d);
2665 assert(sz <= d->buffer_filled);
2666
2667 if (sz == 0)
2668 return;
2669
2670 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2671 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2672 d->buffer_filled -= sz;
2673
2674 if (d->buffer_filled == 0)
2675 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2676 }
2677
2678 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2679 int r;
2680
2681 assert(e);
2682 assert(d);
2683
2684 /* If there's already an event source pending for this priority, don't read another */
2685 if (d->n_pending > 0)
2686 return 0;
2687
2688 while (d->buffer_filled > 0) {
2689 size_t sz;
2690
2691 /* Let's validate that the event structures are complete */
2692 if (d->buffer_filled < offsetof(struct inotify_event, name))
2693 return -EIO;
2694
2695 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2696 if (d->buffer_filled < sz)
2697 return -EIO;
2698
2699 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2700 struct inode_data *inode_data;
2701 Iterator i;
2702
2703 /* The queue overran, let's pass this event to all event sources connected to this inotify
2704 * object */
2705
2706 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2707 sd_event_source *s;
2708
2709 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2710
2711 if (s->enabled == SD_EVENT_OFF)
2712 continue;
2713
2714 r = source_set_pending(s, true);
2715 if (r < 0)
2716 return r;
2717 }
2718 }
2719 } else {
2720 struct inode_data *inode_data;
2721 sd_event_source *s;
2722
2723 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2724 * our watch descriptor table. */
2725 if (d->buffer.ev.mask & IN_IGNORED) {
2726
2727 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2728 if (!inode_data) {
2729 event_inotify_data_drop(e, d, sz);
2730 continue;
2731 }
2732
2733 /* The watch descriptor was removed by the kernel, let's drop it here too */
2734 inode_data->wd = -1;
2735 } else {
2736 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2737 if (!inode_data) {
2738 event_inotify_data_drop(e, d, sz);
2739 continue;
2740 }
2741 }
2742
2743 /* Trigger all event sources that are interested in these events. Also trigger all event
2744 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2745 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2746
2747 if (s->enabled == SD_EVENT_OFF)
2748 continue;
2749
2750 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2751 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2752 continue;
2753
2754 r = source_set_pending(s, true);
2755 if (r < 0)
2756 return r;
2757 }
2758 }
2759
2760 /* Something pending now? If so, let's finish, otherwise let's read more. */
2761 if (d->n_pending > 0)
2762 return 1;
2763 }
2764
2765 return 0;
2766 }
2767
2768 static int process_inotify(sd_event *e) {
2769 struct inotify_data *d;
2770 int r, done = 0;
2771
2772 assert(e);
2773
2774 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2775 r = event_inotify_data_process(e, d);
2776 if (r < 0)
2777 return r;
2778 if (r > 0)
2779 done ++;
2780 }
2781
2782 return done;
2783 }
2784
2785 static int source_dispatch(sd_event_source *s) {
2786 EventSourceType saved_type;
2787 int r = 0;
2788
2789 assert(s);
2790 assert(s->pending || s->type == SOURCE_EXIT);
2791
2792 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2793 * the event. */
2794 saved_type = s->type;
2795
2796 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2797 r = source_set_pending(s, false);
2798 if (r < 0)
2799 return r;
2800 }
2801
2802 if (s->type != SOURCE_POST) {
2803 sd_event_source *z;
2804 Iterator i;
2805
2806 /* If we execute a non-post source, let's mark all
2807 * post sources as pending */
2808
2809 SET_FOREACH(z, s->event->post_sources, i) {
2810 if (z->enabled == SD_EVENT_OFF)
2811 continue;
2812
2813 r = source_set_pending(z, true);
2814 if (r < 0)
2815 return r;
2816 }
2817 }
2818
2819 if (s->enabled == SD_EVENT_ONESHOT) {
2820 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2821 if (r < 0)
2822 return r;
2823 }
2824
2825 s->dispatching = true;
2826
2827 switch (s->type) {
2828
2829 case SOURCE_IO:
2830 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2831 break;
2832
2833 case SOURCE_TIME_REALTIME:
2834 case SOURCE_TIME_BOOTTIME:
2835 case SOURCE_TIME_MONOTONIC:
2836 case SOURCE_TIME_REALTIME_ALARM:
2837 case SOURCE_TIME_BOOTTIME_ALARM:
2838 r = s->time.callback(s, s->time.next, s->userdata);
2839 break;
2840
2841 case SOURCE_SIGNAL:
2842 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2843 break;
2844
2845 case SOURCE_CHILD: {
2846 bool zombie;
2847
2848 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2849
2850 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2851
2852 /* Now, reap the PID for good. */
2853 if (zombie)
2854 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2855
2856 break;
2857 }
2858
2859 case SOURCE_DEFER:
2860 r = s->defer.callback(s, s->userdata);
2861 break;
2862
2863 case SOURCE_POST:
2864 r = s->post.callback(s, s->userdata);
2865 break;
2866
2867 case SOURCE_EXIT:
2868 r = s->exit.callback(s, s->userdata);
2869 break;
2870
2871 case SOURCE_INOTIFY: {
2872 struct sd_event *e = s->event;
2873 struct inotify_data *d;
2874 size_t sz;
2875
2876 assert(s->inotify.inode_data);
2877 assert_se(d = s->inotify.inode_data->inotify_data);
2878
2879 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
2880 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2881 assert(d->buffer_filled >= sz);
2882
2883 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
2884
2885 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2886 * buffer. */
2887 if (d->n_pending == 0)
2888 event_inotify_data_drop(e, d, sz);
2889
2890 break;
2891 }
2892
2893 case SOURCE_WATCHDOG:
2894 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2895 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2896 assert_not_reached("Wut? I shouldn't exist.");
2897 }
2898
2899 s->dispatching = false;
2900
2901 if (r < 0)
2902 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2903 strna(s->description), event_source_type_to_string(saved_type));
2904
2905 if (s->n_ref == 0)
2906 source_free(s);
2907 else if (r < 0)
2908 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2909
2910 return 1;
2911 }
2912
2913 static int event_prepare(sd_event *e) {
2914 int r;
2915
2916 assert(e);
2917
2918 for (;;) {
2919 sd_event_source *s;
2920
2921 s = prioq_peek(e->prepare);
2922 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2923 break;
2924
2925 s->prepare_iteration = e->iteration;
2926 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2927 if (r < 0)
2928 return r;
2929
2930 assert(s->prepare);
2931
2932 s->dispatching = true;
2933 r = s->prepare(s, s->userdata);
2934 s->dispatching = false;
2935
2936 if (r < 0)
2937 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2938 strna(s->description), event_source_type_to_string(s->type));
2939
2940 if (s->n_ref == 0)
2941 source_free(s);
2942 else if (r < 0)
2943 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2944 }
2945
2946 return 0;
2947 }
2948
2949 static int dispatch_exit(sd_event *e) {
2950 sd_event_source *p;
2951 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2952 int r;
2953
2954 assert(e);
2955
2956 p = prioq_peek(e->exit);
2957 if (!p || p->enabled == SD_EVENT_OFF) {
2958 e->state = SD_EVENT_FINISHED;
2959 return 0;
2960 }
2961
2962 ref = sd_event_ref(e);
2963 e->iteration++;
2964 e->state = SD_EVENT_EXITING;
2965 r = source_dispatch(p);
2966 e->state = SD_EVENT_INITIAL;
2967 return r;
2968 }
2969
2970 static sd_event_source* event_next_pending(sd_event *e) {
2971 sd_event_source *p;
2972
2973 assert(e);
2974
2975 p = prioq_peek(e->pending);
2976 if (!p)
2977 return NULL;
2978
2979 if (p->enabled == SD_EVENT_OFF)
2980 return NULL;
2981
2982 return p;
2983 }
2984
2985 static int arm_watchdog(sd_event *e) {
2986 struct itimerspec its = {};
2987 usec_t t;
2988 int r;
2989
2990 assert(e);
2991 assert(e->watchdog_fd >= 0);
2992
2993 t = sleep_between(e,
2994 e->watchdog_last + (e->watchdog_period / 2),
2995 e->watchdog_last + (e->watchdog_period * 3 / 4));
2996
2997 timespec_store(&its.it_value, t);
2998
2999 /* Make sure we never set the watchdog to 0, which tells the
3000 * kernel to disable it. */
3001 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3002 its.it_value.tv_nsec = 1;
3003
3004 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3005 if (r < 0)
3006 return -errno;
3007
3008 return 0;
3009 }
3010
3011 static int process_watchdog(sd_event *e) {
3012 assert(e);
3013
3014 if (!e->watchdog)
3015 return 0;
3016
3017 /* Don't notify watchdog too often */
3018 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3019 return 0;
3020
3021 sd_notify(false, "WATCHDOG=1");
3022 e->watchdog_last = e->timestamp.monotonic;
3023
3024 return arm_watchdog(e);
3025 }
3026
3027 static void event_close_inode_data_fds(sd_event *e) {
3028 struct inode_data *d;
3029
3030 assert(e);
3031
3032 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3033 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3034 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3035 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3036 * compromise. */
3037
3038 while ((d = e->inode_data_to_close)) {
3039 assert(d->fd >= 0);
3040 d->fd = safe_close(d->fd);
3041
3042 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3043 }
3044 }
3045
3046 _public_ int sd_event_prepare(sd_event *e) {
3047 int r;
3048
3049 assert_return(e, -EINVAL);
3050 assert_return(e = event_resolve(e), -ENOPKG);
3051 assert_return(!event_pid_changed(e), -ECHILD);
3052 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3053 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3054
3055 if (e->exit_requested)
3056 goto pending;
3057
3058 e->iteration++;
3059
3060 e->state = SD_EVENT_PREPARING;
3061 r = event_prepare(e);
3062 e->state = SD_EVENT_INITIAL;
3063 if (r < 0)
3064 return r;
3065
3066 r = event_arm_timer(e, &e->realtime);
3067 if (r < 0)
3068 return r;
3069
3070 r = event_arm_timer(e, &e->boottime);
3071 if (r < 0)
3072 return r;
3073
3074 r = event_arm_timer(e, &e->monotonic);
3075 if (r < 0)
3076 return r;
3077
3078 r = event_arm_timer(e, &e->realtime_alarm);
3079 if (r < 0)
3080 return r;
3081
3082 r = event_arm_timer(e, &e->boottime_alarm);
3083 if (r < 0)
3084 return r;
3085
3086 event_close_inode_data_fds(e);
3087
3088 if (event_next_pending(e) || e->need_process_child)
3089 goto pending;
3090
3091 e->state = SD_EVENT_ARMED;
3092
3093 return 0;
3094
3095 pending:
3096 e->state = SD_EVENT_ARMED;
3097 r = sd_event_wait(e, 0);
3098 if (r == 0)
3099 e->state = SD_EVENT_ARMED;
3100
3101 return r;
3102 }
3103
3104 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3105 struct epoll_event *ev_queue;
3106 unsigned ev_queue_max;
3107 int r, m, i;
3108
3109 assert_return(e, -EINVAL);
3110 assert_return(e = event_resolve(e), -ENOPKG);
3111 assert_return(!event_pid_changed(e), -ECHILD);
3112 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3113 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3114
3115 if (e->exit_requested) {
3116 e->state = SD_EVENT_PENDING;
3117 return 1;
3118 }
3119
3120 ev_queue_max = MAX(e->n_sources, 1u);
3121 ev_queue = newa(struct epoll_event, ev_queue_max);
3122
3123 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3124 if (e->inotify_data_buffered)
3125 timeout = 0;
3126
3127 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3128 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3129 if (m < 0) {
3130 if (errno == EINTR) {
3131 e->state = SD_EVENT_PENDING;
3132 return 1;
3133 }
3134
3135 r = -errno;
3136 goto finish;
3137 }
3138
3139 triple_timestamp_get(&e->timestamp);
3140
3141 for (i = 0; i < m; i++) {
3142
3143 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3144 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3145 else {
3146 WakeupType *t = ev_queue[i].data.ptr;
3147
3148 switch (*t) {
3149
3150 case WAKEUP_EVENT_SOURCE:
3151 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3152 break;
3153
3154 case WAKEUP_CLOCK_DATA: {
3155 struct clock_data *d = ev_queue[i].data.ptr;
3156 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3157 break;
3158 }
3159
3160 case WAKEUP_SIGNAL_DATA:
3161 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3162 break;
3163
3164 case WAKEUP_INOTIFY_DATA:
3165 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3166 break;
3167
3168 default:
3169 assert_not_reached("Invalid wake-up pointer");
3170 }
3171 }
3172 if (r < 0)
3173 goto finish;
3174 }
3175
3176 r = process_watchdog(e);
3177 if (r < 0)
3178 goto finish;
3179
3180 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3181 if (r < 0)
3182 goto finish;
3183
3184 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3185 if (r < 0)
3186 goto finish;
3187
3188 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3189 if (r < 0)
3190 goto finish;
3191
3192 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3193 if (r < 0)
3194 goto finish;
3195
3196 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3197 if (r < 0)
3198 goto finish;
3199
3200 if (e->need_process_child) {
3201 r = process_child(e);
3202 if (r < 0)
3203 goto finish;
3204 }
3205
3206 r = process_inotify(e);
3207 if (r < 0)
3208 goto finish;
3209
3210 if (event_next_pending(e)) {
3211 e->state = SD_EVENT_PENDING;
3212
3213 return 1;
3214 }
3215
3216 r = 0;
3217
3218 finish:
3219 e->state = SD_EVENT_INITIAL;
3220
3221 return r;
3222 }
3223
3224 _public_ int sd_event_dispatch(sd_event *e) {
3225 sd_event_source *p;
3226 int r;
3227
3228 assert_return(e, -EINVAL);
3229 assert_return(e = event_resolve(e), -ENOPKG);
3230 assert_return(!event_pid_changed(e), -ECHILD);
3231 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3232 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3233
3234 if (e->exit_requested)
3235 return dispatch_exit(e);
3236
3237 p = event_next_pending(e);
3238 if (p) {
3239 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3240
3241 ref = sd_event_ref(e);
3242 e->state = SD_EVENT_RUNNING;
3243 r = source_dispatch(p);
3244 e->state = SD_EVENT_INITIAL;
3245 return r;
3246 }
3247
3248 e->state = SD_EVENT_INITIAL;
3249
3250 return 1;
3251 }
3252
3253 static void event_log_delays(sd_event *e) {
3254 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3255 size_t l, i;
3256
3257 p = b;
3258 l = sizeof(b);
3259 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3260 l = strpcpyf(&p, l, "%u ", e->delays[i]);
3261 e->delays[i] = 0;
3262 }
3263 log_debug("Event loop iterations: %s", b);
3264 }
3265
3266 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3267 int r;
3268
3269 assert_return(e, -EINVAL);
3270 assert_return(e = event_resolve(e), -ENOPKG);
3271 assert_return(!event_pid_changed(e), -ECHILD);
3272 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3273 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3274
3275 if (e->profile_delays && e->last_run) {
3276 usec_t this_run;
3277 unsigned l;
3278
3279 this_run = now(CLOCK_MONOTONIC);
3280
3281 l = u64log2(this_run - e->last_run);
3282 assert(l < sizeof(e->delays));
3283 e->delays[l]++;
3284
3285 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3286 event_log_delays(e);
3287 e->last_log = this_run;
3288 }
3289 }
3290
3291 r = sd_event_prepare(e);
3292 if (r == 0)
3293 /* There was nothing? Then wait... */
3294 r = sd_event_wait(e, timeout);
3295
3296 if (e->profile_delays)
3297 e->last_run = now(CLOCK_MONOTONIC);
3298
3299 if (r > 0) {
3300 /* There's something now, then let's dispatch it */
3301 r = sd_event_dispatch(e);
3302 if (r < 0)
3303 return r;
3304
3305 return 1;
3306 }
3307
3308 return r;
3309 }
3310
3311 _public_ int sd_event_loop(sd_event *e) {
3312 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3313 int r;
3314
3315 assert_return(e, -EINVAL);
3316 assert_return(e = event_resolve(e), -ENOPKG);
3317 assert_return(!event_pid_changed(e), -ECHILD);
3318 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3319
3320 ref = sd_event_ref(e);
3321
3322 while (e->state != SD_EVENT_FINISHED) {
3323 r = sd_event_run(e, (uint64_t) -1);
3324 if (r < 0)
3325 return r;
3326 }
3327
3328 return e->exit_code;
3329 }
3330
3331 _public_ int sd_event_get_fd(sd_event *e) {
3332
3333 assert_return(e, -EINVAL);
3334 assert_return(e = event_resolve(e), -ENOPKG);
3335 assert_return(!event_pid_changed(e), -ECHILD);
3336
3337 return e->epoll_fd;
3338 }
3339
3340 _public_ int sd_event_get_state(sd_event *e) {
3341 assert_return(e, -EINVAL);
3342 assert_return(e = event_resolve(e), -ENOPKG);
3343 assert_return(!event_pid_changed(e), -ECHILD);
3344
3345 return e->state;
3346 }
3347
3348 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3349 assert_return(e, -EINVAL);
3350 assert_return(e = event_resolve(e), -ENOPKG);
3351 assert_return(code, -EINVAL);
3352 assert_return(!event_pid_changed(e), -ECHILD);
3353
3354 if (!e->exit_requested)
3355 return -ENODATA;
3356
3357 *code = e->exit_code;
3358 return 0;
3359 }
3360
3361 _public_ int sd_event_exit(sd_event *e, int code) {
3362 assert_return(e, -EINVAL);
3363 assert_return(e = event_resolve(e), -ENOPKG);
3364 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3365 assert_return(!event_pid_changed(e), -ECHILD);
3366
3367 e->exit_requested = true;
3368 e->exit_code = code;
3369
3370 return 0;
3371 }
3372
3373 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3374 assert_return(e, -EINVAL);
3375 assert_return(e = event_resolve(e), -ENOPKG);
3376 assert_return(usec, -EINVAL);
3377 assert_return(!event_pid_changed(e), -ECHILD);
3378
3379 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3380 return -EOPNOTSUPP;
3381
3382 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3383 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3384 * the purpose of getting the time this doesn't matter. */
3385 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3386 return -EOPNOTSUPP;
3387
3388 if (!triple_timestamp_is_set(&e->timestamp)) {
3389 /* Implicitly fall back to now() if we never ran
3390 * before and thus have no cached time. */
3391 *usec = now(clock);
3392 return 1;
3393 }
3394
3395 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3396 return 0;
3397 }
3398
3399 _public_ int sd_event_default(sd_event **ret) {
3400 sd_event *e = NULL;
3401 int r;
3402
3403 if (!ret)
3404 return !!default_event;
3405
3406 if (default_event) {
3407 *ret = sd_event_ref(default_event);
3408 return 0;
3409 }
3410
3411 r = sd_event_new(&e);
3412 if (r < 0)
3413 return r;
3414
3415 e->default_event_ptr = &default_event;
3416 e->tid = gettid();
3417 default_event = e;
3418
3419 *ret = e;
3420 return 1;
3421 }
3422
3423 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3424 assert_return(e, -EINVAL);
3425 assert_return(e = event_resolve(e), -ENOPKG);
3426 assert_return(tid, -EINVAL);
3427 assert_return(!event_pid_changed(e), -ECHILD);
3428
3429 if (e->tid != 0) {
3430 *tid = e->tid;
3431 return 0;
3432 }
3433
3434 return -ENXIO;
3435 }
3436
3437 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3438 int r;
3439
3440 assert_return(e, -EINVAL);
3441 assert_return(e = event_resolve(e), -ENOPKG);
3442 assert_return(!event_pid_changed(e), -ECHILD);
3443
3444 if (e->watchdog == !!b)
3445 return e->watchdog;
3446
3447 if (b) {
3448 struct epoll_event ev;
3449
3450 r = sd_watchdog_enabled(false, &e->watchdog_period);
3451 if (r <= 0)
3452 return r;
3453
3454 /* Issue first ping immediately */
3455 sd_notify(false, "WATCHDOG=1");
3456 e->watchdog_last = now(CLOCK_MONOTONIC);
3457
3458 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3459 if (e->watchdog_fd < 0)
3460 return -errno;
3461
3462 r = arm_watchdog(e);
3463 if (r < 0)
3464 goto fail;
3465
3466 ev = (struct epoll_event) {
3467 .events = EPOLLIN,
3468 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3469 };
3470
3471 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3472 if (r < 0) {
3473 r = -errno;
3474 goto fail;
3475 }
3476
3477 } else {
3478 if (e->watchdog_fd >= 0) {
3479 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3480 e->watchdog_fd = safe_close(e->watchdog_fd);
3481 }
3482 }
3483
3484 e->watchdog = !!b;
3485 return e->watchdog;
3486
3487 fail:
3488 e->watchdog_fd = safe_close(e->watchdog_fd);
3489 return r;
3490 }
3491
3492 _public_ int sd_event_get_watchdog(sd_event *e) {
3493 assert_return(e, -EINVAL);
3494 assert_return(e = event_resolve(e), -ENOPKG);
3495 assert_return(!event_pid_changed(e), -ECHILD);
3496
3497 return e->watchdog;
3498 }
3499
3500 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3501 assert_return(e, -EINVAL);
3502 assert_return(e = event_resolve(e), -ENOPKG);
3503 assert_return(!event_pid_changed(e), -ECHILD);
3504
3505 *ret = e->iteration;
3506 return 0;
3507 }
3508
3509 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3510 assert_return(s, -EINVAL);
3511
3512 s->destroy_callback = callback;
3513 return 0;
3514 }
3515
3516 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3517 assert_return(s, -EINVAL);
3518
3519 if (ret)
3520 *ret = s->destroy_callback;
3521
3522 return !!s->destroy_callback;
3523 }
3524
3525 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3526 assert_return(s, -EINVAL);
3527
3528 return s->floating;
3529 }
3530
3531 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3532 assert_return(s, -EINVAL);
3533
3534 if (s->floating == !!b)
3535 return 0;
3536
3537 if (!s->event) /* Already disconnected */
3538 return -ESTALE;
3539
3540 s->floating = b;
3541
3542 if (b) {
3543 sd_event_source_ref(s);
3544 sd_event_unref(s->event);
3545 } else {
3546 sd_event_ref(s->event);
3547 sd_event_source_unref(s);
3548 }
3549
3550 return 1;
3551 }