]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
Merge pull request #12818 from yuwata/network-issue-8726
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "event-source.h"
13 #include "fd-util.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "list.h"
17 #include "macro.h"
18 #include "memory-util.h"
19 #include "missing.h"
20 #include "prioq.h"
21 #include "process-util.h"
22 #include "set.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
26 #include "strxcpyx.h"
27 #include "time-util.h"
28
29 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
30
31 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
32 [SOURCE_IO] = "io",
33 [SOURCE_TIME_REALTIME] = "realtime",
34 [SOURCE_TIME_BOOTTIME] = "bootime",
35 [SOURCE_TIME_MONOTONIC] = "monotonic",
36 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
37 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
38 [SOURCE_SIGNAL] = "signal",
39 [SOURCE_CHILD] = "child",
40 [SOURCE_DEFER] = "defer",
41 [SOURCE_POST] = "post",
42 [SOURCE_EXIT] = "exit",
43 [SOURCE_WATCHDOG] = "watchdog",
44 [SOURCE_INOTIFY] = "inotify",
45 };
46
47 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
48
49 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
50
51 struct sd_event {
52 unsigned n_ref;
53
54 int epoll_fd;
55 int watchdog_fd;
56
57 Prioq *pending;
58 Prioq *prepare;
59
60 /* timerfd_create() only supports these five clocks so far. We
61 * can add support for more clocks when the kernel learns to
62 * deal with them, too. */
63 struct clock_data realtime;
64 struct clock_data boottime;
65 struct clock_data monotonic;
66 struct clock_data realtime_alarm;
67 struct clock_data boottime_alarm;
68
69 usec_t perturb;
70
71 sd_event_source **signal_sources; /* indexed by signal number */
72 Hashmap *signal_data; /* indexed by priority */
73
74 Hashmap *child_sources;
75 unsigned n_enabled_child_sources;
76
77 Set *post_sources;
78
79 Prioq *exit;
80
81 Hashmap *inotify_data; /* indexed by priority */
82
83 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
84 LIST_HEAD(struct inode_data, inode_data_to_close);
85
86 /* A list of inotify objects that already have events buffered which aren't processed yet */
87 LIST_HEAD(struct inotify_data, inotify_data_buffered);
88
89 pid_t original_pid;
90
91 uint64_t iteration;
92 triple_timestamp timestamp;
93 int state;
94
95 bool exit_requested:1;
96 bool need_process_child:1;
97 bool watchdog:1;
98 bool profile_delays:1;
99
100 int exit_code;
101
102 pid_t tid;
103 sd_event **default_event_ptr;
104
105 usec_t watchdog_last, watchdog_period;
106
107 unsigned n_sources;
108
109 LIST_HEAD(sd_event_source, sources);
110
111 usec_t last_run, last_log;
112 unsigned delays[sizeof(usec_t) * 8];
113 };
114
115 static thread_local sd_event *default_event = NULL;
116
117 static void source_disconnect(sd_event_source *s);
118 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
119
120 static sd_event *event_resolve(sd_event *e) {
121 return e == SD_EVENT_DEFAULT ? default_event : e;
122 }
123
124 static int pending_prioq_compare(const void *a, const void *b) {
125 const sd_event_source *x = a, *y = b;
126 int r;
127
128 assert(x->pending);
129 assert(y->pending);
130
131 /* Enabled ones first */
132 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
133 return -1;
134 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
135 return 1;
136
137 /* Lower priority values first */
138 r = CMP(x->priority, y->priority);
139 if (r != 0)
140 return r;
141
142 /* Older entries first */
143 return CMP(x->pending_iteration, y->pending_iteration);
144 }
145
146 static int prepare_prioq_compare(const void *a, const void *b) {
147 const sd_event_source *x = a, *y = b;
148 int r;
149
150 assert(x->prepare);
151 assert(y->prepare);
152
153 /* Enabled ones first */
154 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
155 return -1;
156 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
157 return 1;
158
159 /* Move most recently prepared ones last, so that we can stop
160 * preparing as soon as we hit one that has already been
161 * prepared in the current iteration */
162 r = CMP(x->prepare_iteration, y->prepare_iteration);
163 if (r != 0)
164 return r;
165
166 /* Lower priority values first */
167 return CMP(x->priority, y->priority);
168 }
169
170 static int earliest_time_prioq_compare(const void *a, const void *b) {
171 const sd_event_source *x = a, *y = b;
172
173 assert(EVENT_SOURCE_IS_TIME(x->type));
174 assert(x->type == y->type);
175
176 /* Enabled ones first */
177 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
178 return -1;
179 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
180 return 1;
181
182 /* Move the pending ones to the end */
183 if (!x->pending && y->pending)
184 return -1;
185 if (x->pending && !y->pending)
186 return 1;
187
188 /* Order by time */
189 return CMP(x->time.next, y->time.next);
190 }
191
192 static usec_t time_event_source_latest(const sd_event_source *s) {
193 return usec_add(s->time.next, s->time.accuracy);
194 }
195
196 static int latest_time_prioq_compare(const void *a, const void *b) {
197 const sd_event_source *x = a, *y = b;
198
199 assert(EVENT_SOURCE_IS_TIME(x->type));
200 assert(x->type == y->type);
201
202 /* Enabled ones first */
203 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
204 return -1;
205 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
206 return 1;
207
208 /* Move the pending ones to the end */
209 if (!x->pending && y->pending)
210 return -1;
211 if (x->pending && !y->pending)
212 return 1;
213
214 /* Order by time */
215 return CMP(time_event_source_latest(x), time_event_source_latest(y));
216 }
217
218 static int exit_prioq_compare(const void *a, const void *b) {
219 const sd_event_source *x = a, *y = b;
220
221 assert(x->type == SOURCE_EXIT);
222 assert(y->type == SOURCE_EXIT);
223
224 /* Enabled ones first */
225 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
226 return -1;
227 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
228 return 1;
229
230 /* Lower priority values first */
231 return CMP(x->priority, y->priority);
232 }
233
234 static void free_clock_data(struct clock_data *d) {
235 assert(d);
236 assert(d->wakeup == WAKEUP_CLOCK_DATA);
237
238 safe_close(d->fd);
239 prioq_free(d->earliest);
240 prioq_free(d->latest);
241 }
242
243 static sd_event *event_free(sd_event *e) {
244 sd_event_source *s;
245
246 assert(e);
247
248 while ((s = e->sources)) {
249 assert(s->floating);
250 source_disconnect(s);
251 sd_event_source_unref(s);
252 }
253
254 assert(e->n_sources == 0);
255
256 if (e->default_event_ptr)
257 *(e->default_event_ptr) = NULL;
258
259 safe_close(e->epoll_fd);
260 safe_close(e->watchdog_fd);
261
262 free_clock_data(&e->realtime);
263 free_clock_data(&e->boottime);
264 free_clock_data(&e->monotonic);
265 free_clock_data(&e->realtime_alarm);
266 free_clock_data(&e->boottime_alarm);
267
268 prioq_free(e->pending);
269 prioq_free(e->prepare);
270 prioq_free(e->exit);
271
272 free(e->signal_sources);
273 hashmap_free(e->signal_data);
274
275 hashmap_free(e->inotify_data);
276
277 hashmap_free(e->child_sources);
278 set_free(e->post_sources);
279
280 return mfree(e);
281 }
282
283 _public_ int sd_event_new(sd_event** ret) {
284 sd_event *e;
285 int r;
286
287 assert_return(ret, -EINVAL);
288
289 e = new(sd_event, 1);
290 if (!e)
291 return -ENOMEM;
292
293 *e = (sd_event) {
294 .n_ref = 1,
295 .epoll_fd = -1,
296 .watchdog_fd = -1,
297 .realtime.wakeup = WAKEUP_CLOCK_DATA,
298 .realtime.fd = -1,
299 .realtime.next = USEC_INFINITY,
300 .boottime.wakeup = WAKEUP_CLOCK_DATA,
301 .boottime.fd = -1,
302 .boottime.next = USEC_INFINITY,
303 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
304 .monotonic.fd = -1,
305 .monotonic.next = USEC_INFINITY,
306 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
307 .realtime_alarm.fd = -1,
308 .realtime_alarm.next = USEC_INFINITY,
309 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
310 .boottime_alarm.fd = -1,
311 .boottime_alarm.next = USEC_INFINITY,
312 .perturb = USEC_INFINITY,
313 .original_pid = getpid_cached(),
314 };
315
316 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
317 if (r < 0)
318 goto fail;
319
320 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
321 if (e->epoll_fd < 0) {
322 r = -errno;
323 goto fail;
324 }
325
326 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
327
328 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
329 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
330 e->profile_delays = true;
331 }
332
333 *ret = e;
334 return 0;
335
336 fail:
337 event_free(e);
338 return r;
339 }
340
341 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
342
343 _public_ sd_event_source* sd_event_source_disable_unref(sd_event_source *s) {
344 if (s)
345 (void) sd_event_source_set_enabled(s, SD_EVENT_OFF);
346 return sd_event_source_unref(s);
347 }
348
349 static bool event_pid_changed(sd_event *e) {
350 assert(e);
351
352 /* We don't support people creating an event loop and keeping
353 * it around over a fork(). Let's complain. */
354
355 return e->original_pid != getpid_cached();
356 }
357
358 static void source_io_unregister(sd_event_source *s) {
359 int r;
360
361 assert(s);
362 assert(s->type == SOURCE_IO);
363
364 if (event_pid_changed(s->event))
365 return;
366
367 if (!s->io.registered)
368 return;
369
370 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
371 if (r < 0)
372 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
373 strna(s->description), event_source_type_to_string(s->type));
374
375 s->io.registered = false;
376 }
377
378 static int source_io_register(
379 sd_event_source *s,
380 int enabled,
381 uint32_t events) {
382
383 struct epoll_event ev;
384 int r;
385
386 assert(s);
387 assert(s->type == SOURCE_IO);
388 assert(enabled != SD_EVENT_OFF);
389
390 ev = (struct epoll_event) {
391 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
392 .data.ptr = s,
393 };
394
395 if (s->io.registered)
396 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
397 else
398 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
399 if (r < 0)
400 return -errno;
401
402 s->io.registered = true;
403
404 return 0;
405 }
406
407 static clockid_t event_source_type_to_clock(EventSourceType t) {
408
409 switch (t) {
410
411 case SOURCE_TIME_REALTIME:
412 return CLOCK_REALTIME;
413
414 case SOURCE_TIME_BOOTTIME:
415 return CLOCK_BOOTTIME;
416
417 case SOURCE_TIME_MONOTONIC:
418 return CLOCK_MONOTONIC;
419
420 case SOURCE_TIME_REALTIME_ALARM:
421 return CLOCK_REALTIME_ALARM;
422
423 case SOURCE_TIME_BOOTTIME_ALARM:
424 return CLOCK_BOOTTIME_ALARM;
425
426 default:
427 return (clockid_t) -1;
428 }
429 }
430
431 static EventSourceType clock_to_event_source_type(clockid_t clock) {
432
433 switch (clock) {
434
435 case CLOCK_REALTIME:
436 return SOURCE_TIME_REALTIME;
437
438 case CLOCK_BOOTTIME:
439 return SOURCE_TIME_BOOTTIME;
440
441 case CLOCK_MONOTONIC:
442 return SOURCE_TIME_MONOTONIC;
443
444 case CLOCK_REALTIME_ALARM:
445 return SOURCE_TIME_REALTIME_ALARM;
446
447 case CLOCK_BOOTTIME_ALARM:
448 return SOURCE_TIME_BOOTTIME_ALARM;
449
450 default:
451 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
452 }
453 }
454
455 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
456 assert(e);
457
458 switch (t) {
459
460 case SOURCE_TIME_REALTIME:
461 return &e->realtime;
462
463 case SOURCE_TIME_BOOTTIME:
464 return &e->boottime;
465
466 case SOURCE_TIME_MONOTONIC:
467 return &e->monotonic;
468
469 case SOURCE_TIME_REALTIME_ALARM:
470 return &e->realtime_alarm;
471
472 case SOURCE_TIME_BOOTTIME_ALARM:
473 return &e->boottime_alarm;
474
475 default:
476 return NULL;
477 }
478 }
479
480 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
481 assert(e);
482
483 if (!d)
484 return;
485
486 hashmap_remove(e->signal_data, &d->priority);
487 safe_close(d->fd);
488 free(d);
489 }
490
491 static int event_make_signal_data(
492 sd_event *e,
493 int sig,
494 struct signal_data **ret) {
495
496 struct epoll_event ev;
497 struct signal_data *d;
498 bool added = false;
499 sigset_t ss_copy;
500 int64_t priority;
501 int r;
502
503 assert(e);
504
505 if (event_pid_changed(e))
506 return -ECHILD;
507
508 if (e->signal_sources && e->signal_sources[sig])
509 priority = e->signal_sources[sig]->priority;
510 else
511 priority = SD_EVENT_PRIORITY_NORMAL;
512
513 d = hashmap_get(e->signal_data, &priority);
514 if (d) {
515 if (sigismember(&d->sigset, sig) > 0) {
516 if (ret)
517 *ret = d;
518 return 0;
519 }
520 } else {
521 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
522 if (r < 0)
523 return r;
524
525 d = new(struct signal_data, 1);
526 if (!d)
527 return -ENOMEM;
528
529 *d = (struct signal_data) {
530 .wakeup = WAKEUP_SIGNAL_DATA,
531 .fd = -1,
532 .priority = priority,
533 };
534
535 r = hashmap_put(e->signal_data, &d->priority, d);
536 if (r < 0) {
537 free(d);
538 return r;
539 }
540
541 added = true;
542 }
543
544 ss_copy = d->sigset;
545 assert_se(sigaddset(&ss_copy, sig) >= 0);
546
547 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
548 if (r < 0) {
549 r = -errno;
550 goto fail;
551 }
552
553 d->sigset = ss_copy;
554
555 if (d->fd >= 0) {
556 if (ret)
557 *ret = d;
558 return 0;
559 }
560
561 d->fd = fd_move_above_stdio(r);
562
563 ev = (struct epoll_event) {
564 .events = EPOLLIN,
565 .data.ptr = d,
566 };
567
568 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
569 if (r < 0) {
570 r = -errno;
571 goto fail;
572 }
573
574 if (ret)
575 *ret = d;
576
577 return 0;
578
579 fail:
580 if (added)
581 event_free_signal_data(e, d);
582
583 return r;
584 }
585
586 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
587 assert(e);
588 assert(d);
589
590 /* Turns off the specified signal in the signal data
591 * object. If the signal mask of the object becomes empty that
592 * way removes it. */
593
594 if (sigismember(&d->sigset, sig) == 0)
595 return;
596
597 assert_se(sigdelset(&d->sigset, sig) >= 0);
598
599 if (sigisemptyset(&d->sigset)) {
600 /* If all the mask is all-zero we can get rid of the structure */
601 event_free_signal_data(e, d);
602 return;
603 }
604
605 assert(d->fd >= 0);
606
607 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
608 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
609 }
610
611 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
612 struct signal_data *d;
613 static const int64_t zero_priority = 0;
614
615 assert(e);
616
617 /* Rechecks if the specified signal is still something we are
618 * interested in. If not, we'll unmask it, and possibly drop
619 * the signalfd for it. */
620
621 if (sig == SIGCHLD &&
622 e->n_enabled_child_sources > 0)
623 return;
624
625 if (e->signal_sources &&
626 e->signal_sources[sig] &&
627 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
628 return;
629
630 /*
631 * The specified signal might be enabled in three different queues:
632 *
633 * 1) the one that belongs to the priority passed (if it is non-NULL)
634 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
635 * 3) the 0 priority (to cover the SIGCHLD case)
636 *
637 * Hence, let's remove it from all three here.
638 */
639
640 if (priority) {
641 d = hashmap_get(e->signal_data, priority);
642 if (d)
643 event_unmask_signal_data(e, d, sig);
644 }
645
646 if (e->signal_sources && e->signal_sources[sig]) {
647 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
648 if (d)
649 event_unmask_signal_data(e, d, sig);
650 }
651
652 d = hashmap_get(e->signal_data, &zero_priority);
653 if (d)
654 event_unmask_signal_data(e, d, sig);
655 }
656
657 static void source_disconnect(sd_event_source *s) {
658 sd_event *event;
659
660 assert(s);
661
662 if (!s->event)
663 return;
664
665 assert(s->event->n_sources > 0);
666
667 switch (s->type) {
668
669 case SOURCE_IO:
670 if (s->io.fd >= 0)
671 source_io_unregister(s);
672
673 break;
674
675 case SOURCE_TIME_REALTIME:
676 case SOURCE_TIME_BOOTTIME:
677 case SOURCE_TIME_MONOTONIC:
678 case SOURCE_TIME_REALTIME_ALARM:
679 case SOURCE_TIME_BOOTTIME_ALARM: {
680 struct clock_data *d;
681
682 d = event_get_clock_data(s->event, s->type);
683 assert(d);
684
685 prioq_remove(d->earliest, s, &s->time.earliest_index);
686 prioq_remove(d->latest, s, &s->time.latest_index);
687 d->needs_rearm = true;
688 break;
689 }
690
691 case SOURCE_SIGNAL:
692 if (s->signal.sig > 0) {
693
694 if (s->event->signal_sources)
695 s->event->signal_sources[s->signal.sig] = NULL;
696
697 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
698 }
699
700 break;
701
702 case SOURCE_CHILD:
703 if (s->child.pid > 0) {
704 if (s->enabled != SD_EVENT_OFF) {
705 assert(s->event->n_enabled_child_sources > 0);
706 s->event->n_enabled_child_sources--;
707 }
708
709 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
710 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
711 }
712
713 break;
714
715 case SOURCE_DEFER:
716 /* nothing */
717 break;
718
719 case SOURCE_POST:
720 set_remove(s->event->post_sources, s);
721 break;
722
723 case SOURCE_EXIT:
724 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
725 break;
726
727 case SOURCE_INOTIFY: {
728 struct inode_data *inode_data;
729
730 inode_data = s->inotify.inode_data;
731 if (inode_data) {
732 struct inotify_data *inotify_data;
733 assert_se(inotify_data = inode_data->inotify_data);
734
735 /* Detach this event source from the inode object */
736 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
737 s->inotify.inode_data = NULL;
738
739 if (s->pending) {
740 assert(inotify_data->n_pending > 0);
741 inotify_data->n_pending--;
742 }
743
744 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
745 * continued to being watched. That's because inotify doesn't really have an API for that: we
746 * can only change watch masks with access to the original inode either by fd or by path. But
747 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
748 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
749 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
750 * there), but given the need for open_by_handle_at() which is privileged and not universally
751 * available this would be quite an incomplete solution. Hence we go the other way, leave the
752 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
753 * anymore after reception. Yes, this sucks, but … Linux … */
754
755 /* Maybe release the inode data (and its inotify) */
756 event_gc_inode_data(s->event, inode_data);
757 }
758
759 break;
760 }
761
762 default:
763 assert_not_reached("Wut? I shouldn't exist.");
764 }
765
766 if (s->pending)
767 prioq_remove(s->event->pending, s, &s->pending_index);
768
769 if (s->prepare)
770 prioq_remove(s->event->prepare, s, &s->prepare_index);
771
772 event = s->event;
773
774 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
775 s->event = NULL;
776 LIST_REMOVE(sources, event->sources, s);
777 event->n_sources--;
778
779 if (!s->floating)
780 sd_event_unref(event);
781 }
782
783 static void source_free(sd_event_source *s) {
784 assert(s);
785
786 source_disconnect(s);
787
788 if (s->type == SOURCE_IO && s->io.owned)
789 s->io.fd = safe_close(s->io.fd);
790
791 if (s->destroy_callback)
792 s->destroy_callback(s->userdata);
793
794 free(s->description);
795 free(s);
796 }
797 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
798
799 static int source_set_pending(sd_event_source *s, bool b) {
800 int r;
801
802 assert(s);
803 assert(s->type != SOURCE_EXIT);
804
805 if (s->pending == b)
806 return 0;
807
808 s->pending = b;
809
810 if (b) {
811 s->pending_iteration = s->event->iteration;
812
813 r = prioq_put(s->event->pending, s, &s->pending_index);
814 if (r < 0) {
815 s->pending = false;
816 return r;
817 }
818 } else
819 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
820
821 if (EVENT_SOURCE_IS_TIME(s->type)) {
822 struct clock_data *d;
823
824 d = event_get_clock_data(s->event, s->type);
825 assert(d);
826
827 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
828 prioq_reshuffle(d->latest, s, &s->time.latest_index);
829 d->needs_rearm = true;
830 }
831
832 if (s->type == SOURCE_SIGNAL && !b) {
833 struct signal_data *d;
834
835 d = hashmap_get(s->event->signal_data, &s->priority);
836 if (d && d->current == s)
837 d->current = NULL;
838 }
839
840 if (s->type == SOURCE_INOTIFY) {
841
842 assert(s->inotify.inode_data);
843 assert(s->inotify.inode_data->inotify_data);
844
845 if (b)
846 s->inotify.inode_data->inotify_data->n_pending ++;
847 else {
848 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
849 s->inotify.inode_data->inotify_data->n_pending --;
850 }
851 }
852
853 return 0;
854 }
855
856 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
857 sd_event_source *s;
858
859 assert(e);
860
861 s = new(sd_event_source, 1);
862 if (!s)
863 return NULL;
864
865 *s = (struct sd_event_source) {
866 .n_ref = 1,
867 .event = e,
868 .floating = floating,
869 .type = type,
870 .pending_index = PRIOQ_IDX_NULL,
871 .prepare_index = PRIOQ_IDX_NULL,
872 };
873
874 if (!floating)
875 sd_event_ref(e);
876
877 LIST_PREPEND(sources, e->sources, s);
878 e->n_sources++;
879
880 return s;
881 }
882
883 _public_ int sd_event_add_io(
884 sd_event *e,
885 sd_event_source **ret,
886 int fd,
887 uint32_t events,
888 sd_event_io_handler_t callback,
889 void *userdata) {
890
891 _cleanup_(source_freep) sd_event_source *s = NULL;
892 int r;
893
894 assert_return(e, -EINVAL);
895 assert_return(e = event_resolve(e), -ENOPKG);
896 assert_return(fd >= 0, -EBADF);
897 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
898 assert_return(callback, -EINVAL);
899 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
900 assert_return(!event_pid_changed(e), -ECHILD);
901
902 s = source_new(e, !ret, SOURCE_IO);
903 if (!s)
904 return -ENOMEM;
905
906 s->wakeup = WAKEUP_EVENT_SOURCE;
907 s->io.fd = fd;
908 s->io.events = events;
909 s->io.callback = callback;
910 s->userdata = userdata;
911 s->enabled = SD_EVENT_ON;
912
913 r = source_io_register(s, s->enabled, events);
914 if (r < 0)
915 return r;
916
917 if (ret)
918 *ret = s;
919 TAKE_PTR(s);
920
921 return 0;
922 }
923
924 static void initialize_perturb(sd_event *e) {
925 sd_id128_t bootid = {};
926
927 /* When we sleep for longer, we try to realign the wakeup to
928 the same time within each minute/second/250ms, so that
929 events all across the system can be coalesced into a single
930 CPU wakeup. However, let's take some system-specific
931 randomness for this value, so that in a network of systems
932 with synced clocks timer events are distributed a
933 bit. Here, we calculate a perturbation usec offset from the
934 boot ID. */
935
936 if (_likely_(e->perturb != USEC_INFINITY))
937 return;
938
939 if (sd_id128_get_boot(&bootid) >= 0)
940 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
941 }
942
943 static int event_setup_timer_fd(
944 sd_event *e,
945 struct clock_data *d,
946 clockid_t clock) {
947
948 struct epoll_event ev;
949 int r, fd;
950
951 assert(e);
952 assert(d);
953
954 if (_likely_(d->fd >= 0))
955 return 0;
956
957 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
958 if (fd < 0)
959 return -errno;
960
961 fd = fd_move_above_stdio(fd);
962
963 ev = (struct epoll_event) {
964 .events = EPOLLIN,
965 .data.ptr = d,
966 };
967
968 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
969 if (r < 0) {
970 safe_close(fd);
971 return -errno;
972 }
973
974 d->fd = fd;
975 return 0;
976 }
977
978 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
979 assert(s);
980
981 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
982 }
983
984 _public_ int sd_event_add_time(
985 sd_event *e,
986 sd_event_source **ret,
987 clockid_t clock,
988 uint64_t usec,
989 uint64_t accuracy,
990 sd_event_time_handler_t callback,
991 void *userdata) {
992
993 EventSourceType type;
994 _cleanup_(source_freep) sd_event_source *s = NULL;
995 struct clock_data *d;
996 int r;
997
998 assert_return(e, -EINVAL);
999 assert_return(e = event_resolve(e), -ENOPKG);
1000 assert_return(accuracy != (uint64_t) -1, -EINVAL);
1001 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1002 assert_return(!event_pid_changed(e), -ECHILD);
1003
1004 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
1005 return -EOPNOTSUPP;
1006
1007 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1008 if (type < 0)
1009 return -EOPNOTSUPP;
1010
1011 if (!callback)
1012 callback = time_exit_callback;
1013
1014 d = event_get_clock_data(e, type);
1015 assert(d);
1016
1017 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1018 if (r < 0)
1019 return r;
1020
1021 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1022 if (r < 0)
1023 return r;
1024
1025 if (d->fd < 0) {
1026 r = event_setup_timer_fd(e, d, clock);
1027 if (r < 0)
1028 return r;
1029 }
1030
1031 s = source_new(e, !ret, type);
1032 if (!s)
1033 return -ENOMEM;
1034
1035 s->time.next = usec;
1036 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1037 s->time.callback = callback;
1038 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1039 s->userdata = userdata;
1040 s->enabled = SD_EVENT_ONESHOT;
1041
1042 d->needs_rearm = true;
1043
1044 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1045 if (r < 0)
1046 return r;
1047
1048 r = prioq_put(d->latest, s, &s->time.latest_index);
1049 if (r < 0)
1050 return r;
1051
1052 if (ret)
1053 *ret = s;
1054 TAKE_PTR(s);
1055
1056 return 0;
1057 }
1058
1059 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1060 assert(s);
1061
1062 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1063 }
1064
1065 _public_ int sd_event_add_signal(
1066 sd_event *e,
1067 sd_event_source **ret,
1068 int sig,
1069 sd_event_signal_handler_t callback,
1070 void *userdata) {
1071
1072 _cleanup_(source_freep) sd_event_source *s = NULL;
1073 struct signal_data *d;
1074 sigset_t ss;
1075 int r;
1076
1077 assert_return(e, -EINVAL);
1078 assert_return(e = event_resolve(e), -ENOPKG);
1079 assert_return(SIGNAL_VALID(sig), -EINVAL);
1080 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1081 assert_return(!event_pid_changed(e), -ECHILD);
1082
1083 if (!callback)
1084 callback = signal_exit_callback;
1085
1086 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1087 if (r != 0)
1088 return -r;
1089
1090 if (!sigismember(&ss, sig))
1091 return -EBUSY;
1092
1093 if (!e->signal_sources) {
1094 e->signal_sources = new0(sd_event_source*, _NSIG);
1095 if (!e->signal_sources)
1096 return -ENOMEM;
1097 } else if (e->signal_sources[sig])
1098 return -EBUSY;
1099
1100 s = source_new(e, !ret, SOURCE_SIGNAL);
1101 if (!s)
1102 return -ENOMEM;
1103
1104 s->signal.sig = sig;
1105 s->signal.callback = callback;
1106 s->userdata = userdata;
1107 s->enabled = SD_EVENT_ON;
1108
1109 e->signal_sources[sig] = s;
1110
1111 r = event_make_signal_data(e, sig, &d);
1112 if (r < 0)
1113 return r;
1114
1115 /* Use the signal name as description for the event source by default */
1116 (void) sd_event_source_set_description(s, signal_to_string(sig));
1117
1118 if (ret)
1119 *ret = s;
1120 TAKE_PTR(s);
1121
1122 return 0;
1123 }
1124
1125 _public_ int sd_event_add_child(
1126 sd_event *e,
1127 sd_event_source **ret,
1128 pid_t pid,
1129 int options,
1130 sd_event_child_handler_t callback,
1131 void *userdata) {
1132
1133 _cleanup_(source_freep) sd_event_source *s = NULL;
1134 int r;
1135
1136 assert_return(e, -EINVAL);
1137 assert_return(e = event_resolve(e), -ENOPKG);
1138 assert_return(pid > 1, -EINVAL);
1139 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1140 assert_return(options != 0, -EINVAL);
1141 assert_return(callback, -EINVAL);
1142 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1143 assert_return(!event_pid_changed(e), -ECHILD);
1144
1145 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1146 if (r < 0)
1147 return r;
1148
1149 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1150 return -EBUSY;
1151
1152 s = source_new(e, !ret, SOURCE_CHILD);
1153 if (!s)
1154 return -ENOMEM;
1155
1156 s->child.pid = pid;
1157 s->child.options = options;
1158 s->child.callback = callback;
1159 s->userdata = userdata;
1160 s->enabled = SD_EVENT_ONESHOT;
1161
1162 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1163 if (r < 0)
1164 return r;
1165
1166 e->n_enabled_child_sources++;
1167
1168 r = event_make_signal_data(e, SIGCHLD, NULL);
1169 if (r < 0) {
1170 e->n_enabled_child_sources--;
1171 return r;
1172 }
1173
1174 e->need_process_child = true;
1175
1176 if (ret)
1177 *ret = s;
1178 TAKE_PTR(s);
1179
1180 return 0;
1181 }
1182
1183 _public_ int sd_event_add_defer(
1184 sd_event *e,
1185 sd_event_source **ret,
1186 sd_event_handler_t callback,
1187 void *userdata) {
1188
1189 _cleanup_(source_freep) sd_event_source *s = NULL;
1190 int r;
1191
1192 assert_return(e, -EINVAL);
1193 assert_return(e = event_resolve(e), -ENOPKG);
1194 assert_return(callback, -EINVAL);
1195 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1196 assert_return(!event_pid_changed(e), -ECHILD);
1197
1198 s = source_new(e, !ret, SOURCE_DEFER);
1199 if (!s)
1200 return -ENOMEM;
1201
1202 s->defer.callback = callback;
1203 s->userdata = userdata;
1204 s->enabled = SD_EVENT_ONESHOT;
1205
1206 r = source_set_pending(s, true);
1207 if (r < 0)
1208 return r;
1209
1210 if (ret)
1211 *ret = s;
1212 TAKE_PTR(s);
1213
1214 return 0;
1215 }
1216
1217 _public_ int sd_event_add_post(
1218 sd_event *e,
1219 sd_event_source **ret,
1220 sd_event_handler_t callback,
1221 void *userdata) {
1222
1223 _cleanup_(source_freep) sd_event_source *s = NULL;
1224 int r;
1225
1226 assert_return(e, -EINVAL);
1227 assert_return(e = event_resolve(e), -ENOPKG);
1228 assert_return(callback, -EINVAL);
1229 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1230 assert_return(!event_pid_changed(e), -ECHILD);
1231
1232 r = set_ensure_allocated(&e->post_sources, NULL);
1233 if (r < 0)
1234 return r;
1235
1236 s = source_new(e, !ret, SOURCE_POST);
1237 if (!s)
1238 return -ENOMEM;
1239
1240 s->post.callback = callback;
1241 s->userdata = userdata;
1242 s->enabled = SD_EVENT_ON;
1243
1244 r = set_put(e->post_sources, s);
1245 if (r < 0)
1246 return r;
1247
1248 if (ret)
1249 *ret = s;
1250 TAKE_PTR(s);
1251
1252 return 0;
1253 }
1254
1255 _public_ int sd_event_add_exit(
1256 sd_event *e,
1257 sd_event_source **ret,
1258 sd_event_handler_t callback,
1259 void *userdata) {
1260
1261 _cleanup_(source_freep) sd_event_source *s = NULL;
1262 int r;
1263
1264 assert_return(e, -EINVAL);
1265 assert_return(e = event_resolve(e), -ENOPKG);
1266 assert_return(callback, -EINVAL);
1267 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1268 assert_return(!event_pid_changed(e), -ECHILD);
1269
1270 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1271 if (r < 0)
1272 return r;
1273
1274 s = source_new(e, !ret, SOURCE_EXIT);
1275 if (!s)
1276 return -ENOMEM;
1277
1278 s->exit.callback = callback;
1279 s->userdata = userdata;
1280 s->exit.prioq_index = PRIOQ_IDX_NULL;
1281 s->enabled = SD_EVENT_ONESHOT;
1282
1283 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1284 if (r < 0)
1285 return r;
1286
1287 if (ret)
1288 *ret = s;
1289 TAKE_PTR(s);
1290
1291 return 0;
1292 }
1293
1294 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1295 assert(e);
1296
1297 if (!d)
1298 return;
1299
1300 assert(hashmap_isempty(d->inodes));
1301 assert(hashmap_isempty(d->wd));
1302
1303 if (d->buffer_filled > 0)
1304 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1305
1306 hashmap_free(d->inodes);
1307 hashmap_free(d->wd);
1308
1309 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1310
1311 if (d->fd >= 0) {
1312 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1313 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1314
1315 safe_close(d->fd);
1316 }
1317 free(d);
1318 }
1319
1320 static int event_make_inotify_data(
1321 sd_event *e,
1322 int64_t priority,
1323 struct inotify_data **ret) {
1324
1325 _cleanup_close_ int fd = -1;
1326 struct inotify_data *d;
1327 struct epoll_event ev;
1328 int r;
1329
1330 assert(e);
1331
1332 d = hashmap_get(e->inotify_data, &priority);
1333 if (d) {
1334 if (ret)
1335 *ret = d;
1336 return 0;
1337 }
1338
1339 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1340 if (fd < 0)
1341 return -errno;
1342
1343 fd = fd_move_above_stdio(fd);
1344
1345 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1346 if (r < 0)
1347 return r;
1348
1349 d = new(struct inotify_data, 1);
1350 if (!d)
1351 return -ENOMEM;
1352
1353 *d = (struct inotify_data) {
1354 .wakeup = WAKEUP_INOTIFY_DATA,
1355 .fd = TAKE_FD(fd),
1356 .priority = priority,
1357 };
1358
1359 r = hashmap_put(e->inotify_data, &d->priority, d);
1360 if (r < 0) {
1361 d->fd = safe_close(d->fd);
1362 free(d);
1363 return r;
1364 }
1365
1366 ev = (struct epoll_event) {
1367 .events = EPOLLIN,
1368 .data.ptr = d,
1369 };
1370
1371 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1372 r = -errno;
1373 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1374 * remove the fd from the epoll first, which we don't want as we couldn't
1375 * add it in the first place. */
1376 event_free_inotify_data(e, d);
1377 return r;
1378 }
1379
1380 if (ret)
1381 *ret = d;
1382
1383 return 1;
1384 }
1385
1386 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1387 int r;
1388
1389 assert(x);
1390 assert(y);
1391
1392 r = CMP(x->dev, y->dev);
1393 if (r != 0)
1394 return r;
1395
1396 return CMP(x->ino, y->ino);
1397 }
1398
1399 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1400 assert(d);
1401
1402 siphash24_compress(&d->dev, sizeof(d->dev), state);
1403 siphash24_compress(&d->ino, sizeof(d->ino), state);
1404 }
1405
1406 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1407
1408 static void event_free_inode_data(
1409 sd_event *e,
1410 struct inode_data *d) {
1411
1412 assert(e);
1413
1414 if (!d)
1415 return;
1416
1417 assert(!d->event_sources);
1418
1419 if (d->fd >= 0) {
1420 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1421 safe_close(d->fd);
1422 }
1423
1424 if (d->inotify_data) {
1425
1426 if (d->wd >= 0) {
1427 if (d->inotify_data->fd >= 0) {
1428 /* So here's a problem. At the time this runs the watch descriptor might already be
1429 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1430 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1431 * likely case to happen. */
1432
1433 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1434 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1435 }
1436
1437 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1438 }
1439
1440 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1441 }
1442
1443 free(d);
1444 }
1445
1446 static void event_gc_inode_data(
1447 sd_event *e,
1448 struct inode_data *d) {
1449
1450 struct inotify_data *inotify_data;
1451
1452 assert(e);
1453
1454 if (!d)
1455 return;
1456
1457 if (d->event_sources)
1458 return;
1459
1460 inotify_data = d->inotify_data;
1461 event_free_inode_data(e, d);
1462
1463 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1464 event_free_inotify_data(e, inotify_data);
1465 }
1466
1467 static int event_make_inode_data(
1468 sd_event *e,
1469 struct inotify_data *inotify_data,
1470 dev_t dev,
1471 ino_t ino,
1472 struct inode_data **ret) {
1473
1474 struct inode_data *d, key;
1475 int r;
1476
1477 assert(e);
1478 assert(inotify_data);
1479
1480 key = (struct inode_data) {
1481 .ino = ino,
1482 .dev = dev,
1483 };
1484
1485 d = hashmap_get(inotify_data->inodes, &key);
1486 if (d) {
1487 if (ret)
1488 *ret = d;
1489
1490 return 0;
1491 }
1492
1493 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1494 if (r < 0)
1495 return r;
1496
1497 d = new(struct inode_data, 1);
1498 if (!d)
1499 return -ENOMEM;
1500
1501 *d = (struct inode_data) {
1502 .dev = dev,
1503 .ino = ino,
1504 .wd = -1,
1505 .fd = -1,
1506 .inotify_data = inotify_data,
1507 };
1508
1509 r = hashmap_put(inotify_data->inodes, d, d);
1510 if (r < 0) {
1511 free(d);
1512 return r;
1513 }
1514
1515 if (ret)
1516 *ret = d;
1517
1518 return 1;
1519 }
1520
1521 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1522 bool excl_unlink = true;
1523 uint32_t combined = 0;
1524 sd_event_source *s;
1525
1526 assert(d);
1527
1528 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1529 * the IN_EXCL_UNLINK flag is ANDed instead.
1530 *
1531 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1532 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1533 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1534 * events we don't care for client-side. */
1535
1536 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1537
1538 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1539 excl_unlink = false;
1540
1541 combined |= s->inotify.mask;
1542 }
1543
1544 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1545 }
1546
1547 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1548 uint32_t combined_mask;
1549 int wd, r;
1550
1551 assert(d);
1552 assert(d->fd >= 0);
1553
1554 combined_mask = inode_data_determine_mask(d);
1555
1556 if (d->wd >= 0 && combined_mask == d->combined_mask)
1557 return 0;
1558
1559 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1560 if (r < 0)
1561 return r;
1562
1563 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1564 if (wd < 0)
1565 return -errno;
1566
1567 if (d->wd < 0) {
1568 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1569 if (r < 0) {
1570 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1571 return r;
1572 }
1573
1574 d->wd = wd;
1575
1576 } else if (d->wd != wd) {
1577
1578 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1579 (void) inotify_rm_watch(d->fd, wd);
1580 return -EINVAL;
1581 }
1582
1583 d->combined_mask = combined_mask;
1584 return 1;
1585 }
1586
1587 _public_ int sd_event_add_inotify(
1588 sd_event *e,
1589 sd_event_source **ret,
1590 const char *path,
1591 uint32_t mask,
1592 sd_event_inotify_handler_t callback,
1593 void *userdata) {
1594
1595 struct inotify_data *inotify_data = NULL;
1596 struct inode_data *inode_data = NULL;
1597 _cleanup_close_ int fd = -1;
1598 _cleanup_(source_freep) sd_event_source *s = NULL;
1599 struct stat st;
1600 int r;
1601
1602 assert_return(e, -EINVAL);
1603 assert_return(e = event_resolve(e), -ENOPKG);
1604 assert_return(path, -EINVAL);
1605 assert_return(callback, -EINVAL);
1606 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1607 assert_return(!event_pid_changed(e), -ECHILD);
1608
1609 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1610 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1611 * the user can't use them for us. */
1612 if (mask & IN_MASK_ADD)
1613 return -EINVAL;
1614
1615 fd = open(path, O_PATH|O_CLOEXEC|
1616 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1617 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1618 if (fd < 0)
1619 return -errno;
1620
1621 if (fstat(fd, &st) < 0)
1622 return -errno;
1623
1624 s = source_new(e, !ret, SOURCE_INOTIFY);
1625 if (!s)
1626 return -ENOMEM;
1627
1628 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1629 s->inotify.mask = mask;
1630 s->inotify.callback = callback;
1631 s->userdata = userdata;
1632
1633 /* Allocate an inotify object for this priority, and an inode object within it */
1634 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1635 if (r < 0)
1636 return r;
1637
1638 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1639 if (r < 0) {
1640 event_free_inotify_data(e, inotify_data);
1641 return r;
1642 }
1643
1644 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1645 * the event source, until then, for which we need the original inode. */
1646 if (inode_data->fd < 0) {
1647 inode_data->fd = TAKE_FD(fd);
1648 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1649 }
1650
1651 /* Link our event source to the inode data object */
1652 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1653 s->inotify.inode_data = inode_data;
1654
1655 /* Actually realize the watch now */
1656 r = inode_data_realize_watch(e, inode_data);
1657 if (r < 0)
1658 return r;
1659
1660 (void) sd_event_source_set_description(s, path);
1661
1662 if (ret)
1663 *ret = s;
1664 TAKE_PTR(s);
1665
1666 return 0;
1667 }
1668
1669 static sd_event_source* event_source_free(sd_event_source *s) {
1670 if (!s)
1671 return NULL;
1672
1673 /* Here's a special hack: when we are called from a
1674 * dispatch handler we won't free the event source
1675 * immediately, but we will detach the fd from the
1676 * epoll. This way it is safe for the caller to unref
1677 * the event source and immediately close the fd, but
1678 * we still retain a valid event source object after
1679 * the callback. */
1680
1681 if (s->dispatching) {
1682 if (s->type == SOURCE_IO)
1683 source_io_unregister(s);
1684
1685 source_disconnect(s);
1686 } else
1687 source_free(s);
1688
1689 return NULL;
1690 }
1691
1692 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1693
1694 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1695 assert_return(s, -EINVAL);
1696 assert_return(!event_pid_changed(s->event), -ECHILD);
1697
1698 return free_and_strdup(&s->description, description);
1699 }
1700
1701 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1702 assert_return(s, -EINVAL);
1703 assert_return(description, -EINVAL);
1704 assert_return(!event_pid_changed(s->event), -ECHILD);
1705
1706 if (!s->description)
1707 return -ENXIO;
1708
1709 *description = s->description;
1710 return 0;
1711 }
1712
1713 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1714 assert_return(s, NULL);
1715
1716 return s->event;
1717 }
1718
1719 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1720 assert_return(s, -EINVAL);
1721 assert_return(s->type != SOURCE_EXIT, -EDOM);
1722 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1723 assert_return(!event_pid_changed(s->event), -ECHILD);
1724
1725 return s->pending;
1726 }
1727
1728 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1729 assert_return(s, -EINVAL);
1730 assert_return(s->type == SOURCE_IO, -EDOM);
1731 assert_return(!event_pid_changed(s->event), -ECHILD);
1732
1733 return s->io.fd;
1734 }
1735
1736 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1737 int r;
1738
1739 assert_return(s, -EINVAL);
1740 assert_return(fd >= 0, -EBADF);
1741 assert_return(s->type == SOURCE_IO, -EDOM);
1742 assert_return(!event_pid_changed(s->event), -ECHILD);
1743
1744 if (s->io.fd == fd)
1745 return 0;
1746
1747 if (s->enabled == SD_EVENT_OFF) {
1748 s->io.fd = fd;
1749 s->io.registered = false;
1750 } else {
1751 int saved_fd;
1752
1753 saved_fd = s->io.fd;
1754 assert(s->io.registered);
1755
1756 s->io.fd = fd;
1757 s->io.registered = false;
1758
1759 r = source_io_register(s, s->enabled, s->io.events);
1760 if (r < 0) {
1761 s->io.fd = saved_fd;
1762 s->io.registered = true;
1763 return r;
1764 }
1765
1766 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1767 }
1768
1769 return 0;
1770 }
1771
1772 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1773 assert_return(s, -EINVAL);
1774 assert_return(s->type == SOURCE_IO, -EDOM);
1775
1776 return s->io.owned;
1777 }
1778
1779 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1780 assert_return(s, -EINVAL);
1781 assert_return(s->type == SOURCE_IO, -EDOM);
1782
1783 s->io.owned = own;
1784 return 0;
1785 }
1786
1787 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1788 assert_return(s, -EINVAL);
1789 assert_return(events, -EINVAL);
1790 assert_return(s->type == SOURCE_IO, -EDOM);
1791 assert_return(!event_pid_changed(s->event), -ECHILD);
1792
1793 *events = s->io.events;
1794 return 0;
1795 }
1796
1797 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1798 int r;
1799
1800 assert_return(s, -EINVAL);
1801 assert_return(s->type == SOURCE_IO, -EDOM);
1802 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1803 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1804 assert_return(!event_pid_changed(s->event), -ECHILD);
1805
1806 /* edge-triggered updates are never skipped, so we can reset edges */
1807 if (s->io.events == events && !(events & EPOLLET))
1808 return 0;
1809
1810 r = source_set_pending(s, false);
1811 if (r < 0)
1812 return r;
1813
1814 if (s->enabled != SD_EVENT_OFF) {
1815 r = source_io_register(s, s->enabled, events);
1816 if (r < 0)
1817 return r;
1818 }
1819
1820 s->io.events = events;
1821
1822 return 0;
1823 }
1824
1825 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1826 assert_return(s, -EINVAL);
1827 assert_return(revents, -EINVAL);
1828 assert_return(s->type == SOURCE_IO, -EDOM);
1829 assert_return(s->pending, -ENODATA);
1830 assert_return(!event_pid_changed(s->event), -ECHILD);
1831
1832 *revents = s->io.revents;
1833 return 0;
1834 }
1835
1836 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1837 assert_return(s, -EINVAL);
1838 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1839 assert_return(!event_pid_changed(s->event), -ECHILD);
1840
1841 return s->signal.sig;
1842 }
1843
1844 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1845 assert_return(s, -EINVAL);
1846 assert_return(!event_pid_changed(s->event), -ECHILD);
1847
1848 *priority = s->priority;
1849 return 0;
1850 }
1851
1852 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1853 bool rm_inotify = false, rm_inode = false;
1854 struct inotify_data *new_inotify_data = NULL;
1855 struct inode_data *new_inode_data = NULL;
1856 int r;
1857
1858 assert_return(s, -EINVAL);
1859 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1860 assert_return(!event_pid_changed(s->event), -ECHILD);
1861
1862 if (s->priority == priority)
1863 return 0;
1864
1865 if (s->type == SOURCE_INOTIFY) {
1866 struct inode_data *old_inode_data;
1867
1868 assert(s->inotify.inode_data);
1869 old_inode_data = s->inotify.inode_data;
1870
1871 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1872 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1873 * events we allow priority changes only until the first following iteration. */
1874 if (old_inode_data->fd < 0)
1875 return -EOPNOTSUPP;
1876
1877 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
1878 if (r < 0)
1879 return r;
1880 rm_inotify = r > 0;
1881
1882 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
1883 if (r < 0)
1884 goto fail;
1885 rm_inode = r > 0;
1886
1887 if (new_inode_data->fd < 0) {
1888 /* Duplicate the fd for the new inode object if we don't have any yet */
1889 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
1890 if (new_inode_data->fd < 0) {
1891 r = -errno;
1892 goto fail;
1893 }
1894
1895 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
1896 }
1897
1898 /* Move the event source to the new inode data structure */
1899 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
1900 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
1901 s->inotify.inode_data = new_inode_data;
1902
1903 /* Now create the new watch */
1904 r = inode_data_realize_watch(s->event, new_inode_data);
1905 if (r < 0) {
1906 /* Move it back */
1907 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
1908 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
1909 s->inotify.inode_data = old_inode_data;
1910 goto fail;
1911 }
1912
1913 s->priority = priority;
1914
1915 event_gc_inode_data(s->event, old_inode_data);
1916
1917 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1918 struct signal_data *old, *d;
1919
1920 /* Move us from the signalfd belonging to the old
1921 * priority to the signalfd of the new priority */
1922
1923 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1924
1925 s->priority = priority;
1926
1927 r = event_make_signal_data(s->event, s->signal.sig, &d);
1928 if (r < 0) {
1929 s->priority = old->priority;
1930 return r;
1931 }
1932
1933 event_unmask_signal_data(s->event, old, s->signal.sig);
1934 } else
1935 s->priority = priority;
1936
1937 if (s->pending)
1938 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1939
1940 if (s->prepare)
1941 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1942
1943 if (s->type == SOURCE_EXIT)
1944 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1945
1946 return 0;
1947
1948 fail:
1949 if (rm_inode)
1950 event_free_inode_data(s->event, new_inode_data);
1951
1952 if (rm_inotify)
1953 event_free_inotify_data(s->event, new_inotify_data);
1954
1955 return r;
1956 }
1957
1958 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1959 assert_return(s, -EINVAL);
1960 assert_return(!event_pid_changed(s->event), -ECHILD);
1961
1962 if (m)
1963 *m = s->enabled;
1964 return s->enabled != SD_EVENT_OFF;
1965 }
1966
1967 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1968 int r;
1969
1970 assert_return(s, -EINVAL);
1971 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1972 assert_return(!event_pid_changed(s->event), -ECHILD);
1973
1974 /* If we are dead anyway, we are fine with turning off
1975 * sources, but everything else needs to fail. */
1976 if (s->event->state == SD_EVENT_FINISHED)
1977 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1978
1979 if (s->enabled == m)
1980 return 0;
1981
1982 if (m == SD_EVENT_OFF) {
1983
1984 /* Unset the pending flag when this event source is disabled */
1985 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1986 r = source_set_pending(s, false);
1987 if (r < 0)
1988 return r;
1989 }
1990
1991 switch (s->type) {
1992
1993 case SOURCE_IO:
1994 source_io_unregister(s);
1995 s->enabled = m;
1996 break;
1997
1998 case SOURCE_TIME_REALTIME:
1999 case SOURCE_TIME_BOOTTIME:
2000 case SOURCE_TIME_MONOTONIC:
2001 case SOURCE_TIME_REALTIME_ALARM:
2002 case SOURCE_TIME_BOOTTIME_ALARM: {
2003 struct clock_data *d;
2004
2005 s->enabled = m;
2006 d = event_get_clock_data(s->event, s->type);
2007 assert(d);
2008
2009 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2010 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2011 d->needs_rearm = true;
2012 break;
2013 }
2014
2015 case SOURCE_SIGNAL:
2016 s->enabled = m;
2017
2018 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2019 break;
2020
2021 case SOURCE_CHILD:
2022 s->enabled = m;
2023
2024 assert(s->event->n_enabled_child_sources > 0);
2025 s->event->n_enabled_child_sources--;
2026
2027 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2028 break;
2029
2030 case SOURCE_EXIT:
2031 s->enabled = m;
2032 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2033 break;
2034
2035 case SOURCE_DEFER:
2036 case SOURCE_POST:
2037 case SOURCE_INOTIFY:
2038 s->enabled = m;
2039 break;
2040
2041 default:
2042 assert_not_reached("Wut? I shouldn't exist.");
2043 }
2044
2045 } else {
2046
2047 /* Unset the pending flag when this event source is enabled */
2048 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2049 r = source_set_pending(s, false);
2050 if (r < 0)
2051 return r;
2052 }
2053
2054 switch (s->type) {
2055
2056 case SOURCE_IO:
2057 r = source_io_register(s, m, s->io.events);
2058 if (r < 0)
2059 return r;
2060
2061 s->enabled = m;
2062 break;
2063
2064 case SOURCE_TIME_REALTIME:
2065 case SOURCE_TIME_BOOTTIME:
2066 case SOURCE_TIME_MONOTONIC:
2067 case SOURCE_TIME_REALTIME_ALARM:
2068 case SOURCE_TIME_BOOTTIME_ALARM: {
2069 struct clock_data *d;
2070
2071 s->enabled = m;
2072 d = event_get_clock_data(s->event, s->type);
2073 assert(d);
2074
2075 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2076 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2077 d->needs_rearm = true;
2078 break;
2079 }
2080
2081 case SOURCE_SIGNAL:
2082
2083 s->enabled = m;
2084
2085 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2086 if (r < 0) {
2087 s->enabled = SD_EVENT_OFF;
2088 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2089 return r;
2090 }
2091
2092 break;
2093
2094 case SOURCE_CHILD:
2095
2096 if (s->enabled == SD_EVENT_OFF)
2097 s->event->n_enabled_child_sources++;
2098
2099 s->enabled = m;
2100
2101 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2102 if (r < 0) {
2103 s->enabled = SD_EVENT_OFF;
2104 s->event->n_enabled_child_sources--;
2105 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2106 return r;
2107 }
2108
2109 break;
2110
2111 case SOURCE_EXIT:
2112 s->enabled = m;
2113 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2114 break;
2115
2116 case SOURCE_DEFER:
2117 case SOURCE_POST:
2118 case SOURCE_INOTIFY:
2119 s->enabled = m;
2120 break;
2121
2122 default:
2123 assert_not_reached("Wut? I shouldn't exist.");
2124 }
2125 }
2126
2127 if (s->pending)
2128 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2129
2130 if (s->prepare)
2131 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2132
2133 return 0;
2134 }
2135
2136 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2137 assert_return(s, -EINVAL);
2138 assert_return(usec, -EINVAL);
2139 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2140 assert_return(!event_pid_changed(s->event), -ECHILD);
2141
2142 *usec = s->time.next;
2143 return 0;
2144 }
2145
2146 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2147 struct clock_data *d;
2148 int r;
2149
2150 assert_return(s, -EINVAL);
2151 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2152 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2153 assert_return(!event_pid_changed(s->event), -ECHILD);
2154
2155 r = source_set_pending(s, false);
2156 if (r < 0)
2157 return r;
2158
2159 s->time.next = usec;
2160
2161 d = event_get_clock_data(s->event, s->type);
2162 assert(d);
2163
2164 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2165 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2166 d->needs_rearm = true;
2167
2168 return 0;
2169 }
2170
2171 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2172 assert_return(s, -EINVAL);
2173 assert_return(usec, -EINVAL);
2174 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2175 assert_return(!event_pid_changed(s->event), -ECHILD);
2176
2177 *usec = s->time.accuracy;
2178 return 0;
2179 }
2180
2181 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2182 struct clock_data *d;
2183 int r;
2184
2185 assert_return(s, -EINVAL);
2186 assert_return(usec != (uint64_t) -1, -EINVAL);
2187 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2188 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2189 assert_return(!event_pid_changed(s->event), -ECHILD);
2190
2191 r = source_set_pending(s, false);
2192 if (r < 0)
2193 return r;
2194
2195 if (usec == 0)
2196 usec = DEFAULT_ACCURACY_USEC;
2197
2198 s->time.accuracy = usec;
2199
2200 d = event_get_clock_data(s->event, s->type);
2201 assert(d);
2202
2203 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2204 d->needs_rearm = true;
2205
2206 return 0;
2207 }
2208
2209 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2210 assert_return(s, -EINVAL);
2211 assert_return(clock, -EINVAL);
2212 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2213 assert_return(!event_pid_changed(s->event), -ECHILD);
2214
2215 *clock = event_source_type_to_clock(s->type);
2216 return 0;
2217 }
2218
2219 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2220 assert_return(s, -EINVAL);
2221 assert_return(pid, -EINVAL);
2222 assert_return(s->type == SOURCE_CHILD, -EDOM);
2223 assert_return(!event_pid_changed(s->event), -ECHILD);
2224
2225 *pid = s->child.pid;
2226 return 0;
2227 }
2228
2229 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2230 assert_return(s, -EINVAL);
2231 assert_return(mask, -EINVAL);
2232 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2233 assert_return(!event_pid_changed(s->event), -ECHILD);
2234
2235 *mask = s->inotify.mask;
2236 return 0;
2237 }
2238
2239 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2240 int r;
2241
2242 assert_return(s, -EINVAL);
2243 assert_return(s->type != SOURCE_EXIT, -EDOM);
2244 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2245 assert_return(!event_pid_changed(s->event), -ECHILD);
2246
2247 if (s->prepare == callback)
2248 return 0;
2249
2250 if (callback && s->prepare) {
2251 s->prepare = callback;
2252 return 0;
2253 }
2254
2255 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2256 if (r < 0)
2257 return r;
2258
2259 s->prepare = callback;
2260
2261 if (callback) {
2262 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2263 if (r < 0)
2264 return r;
2265 } else
2266 prioq_remove(s->event->prepare, s, &s->prepare_index);
2267
2268 return 0;
2269 }
2270
2271 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2272 assert_return(s, NULL);
2273
2274 return s->userdata;
2275 }
2276
2277 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2278 void *ret;
2279
2280 assert_return(s, NULL);
2281
2282 ret = s->userdata;
2283 s->userdata = userdata;
2284
2285 return ret;
2286 }
2287
2288 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2289 usec_t c;
2290 assert(e);
2291 assert(a <= b);
2292
2293 if (a <= 0)
2294 return 0;
2295 if (a >= USEC_INFINITY)
2296 return USEC_INFINITY;
2297
2298 if (b <= a + 1)
2299 return a;
2300
2301 initialize_perturb(e);
2302
2303 /*
2304 Find a good time to wake up again between times a and b. We
2305 have two goals here:
2306
2307 a) We want to wake up as seldom as possible, hence prefer
2308 later times over earlier times.
2309
2310 b) But if we have to wake up, then let's make sure to
2311 dispatch as much as possible on the entire system.
2312
2313 We implement this by waking up everywhere at the same time
2314 within any given minute if we can, synchronised via the
2315 perturbation value determined from the boot ID. If we can't,
2316 then we try to find the same spot in every 10s, then 1s and
2317 then 250ms step. Otherwise, we pick the last possible time
2318 to wake up.
2319 */
2320
2321 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2322 if (c >= b) {
2323 if (_unlikely_(c < USEC_PER_MINUTE))
2324 return b;
2325
2326 c -= USEC_PER_MINUTE;
2327 }
2328
2329 if (c >= a)
2330 return c;
2331
2332 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2333 if (c >= b) {
2334 if (_unlikely_(c < USEC_PER_SEC*10))
2335 return b;
2336
2337 c -= USEC_PER_SEC*10;
2338 }
2339
2340 if (c >= a)
2341 return c;
2342
2343 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2344 if (c >= b) {
2345 if (_unlikely_(c < USEC_PER_SEC))
2346 return b;
2347
2348 c -= USEC_PER_SEC;
2349 }
2350
2351 if (c >= a)
2352 return c;
2353
2354 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2355 if (c >= b) {
2356 if (_unlikely_(c < USEC_PER_MSEC*250))
2357 return b;
2358
2359 c -= USEC_PER_MSEC*250;
2360 }
2361
2362 if (c >= a)
2363 return c;
2364
2365 return b;
2366 }
2367
2368 static int event_arm_timer(
2369 sd_event *e,
2370 struct clock_data *d) {
2371
2372 struct itimerspec its = {};
2373 sd_event_source *a, *b;
2374 usec_t t;
2375 int r;
2376
2377 assert(e);
2378 assert(d);
2379
2380 if (!d->needs_rearm)
2381 return 0;
2382 else
2383 d->needs_rearm = false;
2384
2385 a = prioq_peek(d->earliest);
2386 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2387
2388 if (d->fd < 0)
2389 return 0;
2390
2391 if (d->next == USEC_INFINITY)
2392 return 0;
2393
2394 /* disarm */
2395 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2396 if (r < 0)
2397 return r;
2398
2399 d->next = USEC_INFINITY;
2400 return 0;
2401 }
2402
2403 b = prioq_peek(d->latest);
2404 assert_se(b && b->enabled != SD_EVENT_OFF);
2405
2406 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2407 if (d->next == t)
2408 return 0;
2409
2410 assert_se(d->fd >= 0);
2411
2412 if (t == 0) {
2413 /* We don' want to disarm here, just mean some time looooong ago. */
2414 its.it_value.tv_sec = 0;
2415 its.it_value.tv_nsec = 1;
2416 } else
2417 timespec_store(&its.it_value, t);
2418
2419 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2420 if (r < 0)
2421 return -errno;
2422
2423 d->next = t;
2424 return 0;
2425 }
2426
2427 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2428 assert(e);
2429 assert(s);
2430 assert(s->type == SOURCE_IO);
2431
2432 /* If the event source was already pending, we just OR in the
2433 * new revents, otherwise we reset the value. The ORing is
2434 * necessary to handle EPOLLONESHOT events properly where
2435 * readability might happen independently of writability, and
2436 * we need to keep track of both */
2437
2438 if (s->pending)
2439 s->io.revents |= revents;
2440 else
2441 s->io.revents = revents;
2442
2443 return source_set_pending(s, true);
2444 }
2445
2446 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2447 uint64_t x;
2448 ssize_t ss;
2449
2450 assert(e);
2451 assert(fd >= 0);
2452
2453 assert_return(events == EPOLLIN, -EIO);
2454
2455 ss = read(fd, &x, sizeof(x));
2456 if (ss < 0) {
2457 if (IN_SET(errno, EAGAIN, EINTR))
2458 return 0;
2459
2460 return -errno;
2461 }
2462
2463 if (_unlikely_(ss != sizeof(x)))
2464 return -EIO;
2465
2466 if (next)
2467 *next = USEC_INFINITY;
2468
2469 return 0;
2470 }
2471
2472 static int process_timer(
2473 sd_event *e,
2474 usec_t n,
2475 struct clock_data *d) {
2476
2477 sd_event_source *s;
2478 int r;
2479
2480 assert(e);
2481 assert(d);
2482
2483 for (;;) {
2484 s = prioq_peek(d->earliest);
2485 if (!s ||
2486 s->time.next > n ||
2487 s->enabled == SD_EVENT_OFF ||
2488 s->pending)
2489 break;
2490
2491 r = source_set_pending(s, true);
2492 if (r < 0)
2493 return r;
2494
2495 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2496 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2497 d->needs_rearm = true;
2498 }
2499
2500 return 0;
2501 }
2502
2503 static int process_child(sd_event *e) {
2504 sd_event_source *s;
2505 Iterator i;
2506 int r;
2507
2508 assert(e);
2509
2510 e->need_process_child = false;
2511
2512 /*
2513 So, this is ugly. We iteratively invoke waitid() with P_PID
2514 + WNOHANG for each PID we wait for, instead of using
2515 P_ALL. This is because we only want to get child
2516 information of very specific child processes, and not all
2517 of them. We might not have processed the SIGCHLD even of a
2518 previous invocation and we don't want to maintain a
2519 unbounded *per-child* event queue, hence we really don't
2520 want anything flushed out of the kernel's queue that we
2521 don't care about. Since this is O(n) this means that if you
2522 have a lot of processes you probably want to handle SIGCHLD
2523 yourself.
2524
2525 We do not reap the children here (by using WNOWAIT), this
2526 is only done after the event source is dispatched so that
2527 the callback still sees the process as a zombie.
2528 */
2529
2530 HASHMAP_FOREACH(s, e->child_sources, i) {
2531 assert(s->type == SOURCE_CHILD);
2532
2533 if (s->pending)
2534 continue;
2535
2536 if (s->enabled == SD_EVENT_OFF)
2537 continue;
2538
2539 zero(s->child.siginfo);
2540 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2541 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2542 if (r < 0)
2543 return -errno;
2544
2545 if (s->child.siginfo.si_pid != 0) {
2546 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2547
2548 if (!zombie && (s->child.options & WEXITED)) {
2549 /* If the child isn't dead then let's
2550 * immediately remove the state change
2551 * from the queue, since there's no
2552 * benefit in leaving it queued */
2553
2554 assert(s->child.options & (WSTOPPED|WCONTINUED));
2555 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2556 }
2557
2558 r = source_set_pending(s, true);
2559 if (r < 0)
2560 return r;
2561 }
2562 }
2563
2564 return 0;
2565 }
2566
2567 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2568 bool read_one = false;
2569 int r;
2570
2571 assert(e);
2572 assert(d);
2573 assert_return(events == EPOLLIN, -EIO);
2574
2575 /* If there's a signal queued on this priority and SIGCHLD is
2576 on this priority too, then make sure to recheck the
2577 children we watch. This is because we only ever dequeue
2578 the first signal per priority, and if we dequeue one, and
2579 SIGCHLD might be enqueued later we wouldn't know, but we
2580 might have higher priority children we care about hence we
2581 need to check that explicitly. */
2582
2583 if (sigismember(&d->sigset, SIGCHLD))
2584 e->need_process_child = true;
2585
2586 /* If there's already an event source pending for this
2587 * priority we don't read another */
2588 if (d->current)
2589 return 0;
2590
2591 for (;;) {
2592 struct signalfd_siginfo si;
2593 ssize_t n;
2594 sd_event_source *s = NULL;
2595
2596 n = read(d->fd, &si, sizeof(si));
2597 if (n < 0) {
2598 if (IN_SET(errno, EAGAIN, EINTR))
2599 return read_one;
2600
2601 return -errno;
2602 }
2603
2604 if (_unlikely_(n != sizeof(si)))
2605 return -EIO;
2606
2607 assert(SIGNAL_VALID(si.ssi_signo));
2608
2609 read_one = true;
2610
2611 if (e->signal_sources)
2612 s = e->signal_sources[si.ssi_signo];
2613 if (!s)
2614 continue;
2615 if (s->pending)
2616 continue;
2617
2618 s->signal.siginfo = si;
2619 d->current = s;
2620
2621 r = source_set_pending(s, true);
2622 if (r < 0)
2623 return r;
2624
2625 return 1;
2626 }
2627 }
2628
2629 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2630 ssize_t n;
2631
2632 assert(e);
2633 assert(d);
2634
2635 assert_return(revents == EPOLLIN, -EIO);
2636
2637 /* If there's already an event source pending for this priority, don't read another */
2638 if (d->n_pending > 0)
2639 return 0;
2640
2641 /* Is the read buffer non-empty? If so, let's not read more */
2642 if (d->buffer_filled > 0)
2643 return 0;
2644
2645 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2646 if (n < 0) {
2647 if (IN_SET(errno, EAGAIN, EINTR))
2648 return 0;
2649
2650 return -errno;
2651 }
2652
2653 assert(n > 0);
2654 d->buffer_filled = (size_t) n;
2655 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2656
2657 return 1;
2658 }
2659
2660 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2661 assert(e);
2662 assert(d);
2663 assert(sz <= d->buffer_filled);
2664
2665 if (sz == 0)
2666 return;
2667
2668 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2669 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2670 d->buffer_filled -= sz;
2671
2672 if (d->buffer_filled == 0)
2673 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2674 }
2675
2676 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2677 int r;
2678
2679 assert(e);
2680 assert(d);
2681
2682 /* If there's already an event source pending for this priority, don't read another */
2683 if (d->n_pending > 0)
2684 return 0;
2685
2686 while (d->buffer_filled > 0) {
2687 size_t sz;
2688
2689 /* Let's validate that the event structures are complete */
2690 if (d->buffer_filled < offsetof(struct inotify_event, name))
2691 return -EIO;
2692
2693 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2694 if (d->buffer_filled < sz)
2695 return -EIO;
2696
2697 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2698 struct inode_data *inode_data;
2699 Iterator i;
2700
2701 /* The queue overran, let's pass this event to all event sources connected to this inotify
2702 * object */
2703
2704 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2705 sd_event_source *s;
2706
2707 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2708
2709 if (s->enabled == SD_EVENT_OFF)
2710 continue;
2711
2712 r = source_set_pending(s, true);
2713 if (r < 0)
2714 return r;
2715 }
2716 }
2717 } else {
2718 struct inode_data *inode_data;
2719 sd_event_source *s;
2720
2721 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2722 * our watch descriptor table. */
2723 if (d->buffer.ev.mask & IN_IGNORED) {
2724
2725 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2726 if (!inode_data) {
2727 event_inotify_data_drop(e, d, sz);
2728 continue;
2729 }
2730
2731 /* The watch descriptor was removed by the kernel, let's drop it here too */
2732 inode_data->wd = -1;
2733 } else {
2734 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2735 if (!inode_data) {
2736 event_inotify_data_drop(e, d, sz);
2737 continue;
2738 }
2739 }
2740
2741 /* Trigger all event sources that are interested in these events. Also trigger all event
2742 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2743 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2744
2745 if (s->enabled == SD_EVENT_OFF)
2746 continue;
2747
2748 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2749 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2750 continue;
2751
2752 r = source_set_pending(s, true);
2753 if (r < 0)
2754 return r;
2755 }
2756 }
2757
2758 /* Something pending now? If so, let's finish, otherwise let's read more. */
2759 if (d->n_pending > 0)
2760 return 1;
2761 }
2762
2763 return 0;
2764 }
2765
2766 static int process_inotify(sd_event *e) {
2767 struct inotify_data *d;
2768 int r, done = 0;
2769
2770 assert(e);
2771
2772 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2773 r = event_inotify_data_process(e, d);
2774 if (r < 0)
2775 return r;
2776 if (r > 0)
2777 done ++;
2778 }
2779
2780 return done;
2781 }
2782
2783 static int source_dispatch(sd_event_source *s) {
2784 EventSourceType saved_type;
2785 int r = 0;
2786
2787 assert(s);
2788 assert(s->pending || s->type == SOURCE_EXIT);
2789
2790 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2791 * the event. */
2792 saved_type = s->type;
2793
2794 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2795 r = source_set_pending(s, false);
2796 if (r < 0)
2797 return r;
2798 }
2799
2800 if (s->type != SOURCE_POST) {
2801 sd_event_source *z;
2802 Iterator i;
2803
2804 /* If we execute a non-post source, let's mark all
2805 * post sources as pending */
2806
2807 SET_FOREACH(z, s->event->post_sources, i) {
2808 if (z->enabled == SD_EVENT_OFF)
2809 continue;
2810
2811 r = source_set_pending(z, true);
2812 if (r < 0)
2813 return r;
2814 }
2815 }
2816
2817 if (s->enabled == SD_EVENT_ONESHOT) {
2818 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2819 if (r < 0)
2820 return r;
2821 }
2822
2823 s->dispatching = true;
2824
2825 switch (s->type) {
2826
2827 case SOURCE_IO:
2828 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2829 break;
2830
2831 case SOURCE_TIME_REALTIME:
2832 case SOURCE_TIME_BOOTTIME:
2833 case SOURCE_TIME_MONOTONIC:
2834 case SOURCE_TIME_REALTIME_ALARM:
2835 case SOURCE_TIME_BOOTTIME_ALARM:
2836 r = s->time.callback(s, s->time.next, s->userdata);
2837 break;
2838
2839 case SOURCE_SIGNAL:
2840 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2841 break;
2842
2843 case SOURCE_CHILD: {
2844 bool zombie;
2845
2846 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2847
2848 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2849
2850 /* Now, reap the PID for good. */
2851 if (zombie)
2852 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2853
2854 break;
2855 }
2856
2857 case SOURCE_DEFER:
2858 r = s->defer.callback(s, s->userdata);
2859 break;
2860
2861 case SOURCE_POST:
2862 r = s->post.callback(s, s->userdata);
2863 break;
2864
2865 case SOURCE_EXIT:
2866 r = s->exit.callback(s, s->userdata);
2867 break;
2868
2869 case SOURCE_INOTIFY: {
2870 struct sd_event *e = s->event;
2871 struct inotify_data *d;
2872 size_t sz;
2873
2874 assert(s->inotify.inode_data);
2875 assert_se(d = s->inotify.inode_data->inotify_data);
2876
2877 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
2878 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2879 assert(d->buffer_filled >= sz);
2880
2881 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
2882
2883 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2884 * buffer. */
2885 if (d->n_pending == 0)
2886 event_inotify_data_drop(e, d, sz);
2887
2888 break;
2889 }
2890
2891 case SOURCE_WATCHDOG:
2892 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2893 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2894 assert_not_reached("Wut? I shouldn't exist.");
2895 }
2896
2897 s->dispatching = false;
2898
2899 if (r < 0)
2900 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2901 strna(s->description), event_source_type_to_string(saved_type));
2902
2903 if (s->n_ref == 0)
2904 source_free(s);
2905 else if (r < 0)
2906 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2907
2908 return 1;
2909 }
2910
2911 static int event_prepare(sd_event *e) {
2912 int r;
2913
2914 assert(e);
2915
2916 for (;;) {
2917 sd_event_source *s;
2918
2919 s = prioq_peek(e->prepare);
2920 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2921 break;
2922
2923 s->prepare_iteration = e->iteration;
2924 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2925 if (r < 0)
2926 return r;
2927
2928 assert(s->prepare);
2929
2930 s->dispatching = true;
2931 r = s->prepare(s, s->userdata);
2932 s->dispatching = false;
2933
2934 if (r < 0)
2935 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2936 strna(s->description), event_source_type_to_string(s->type));
2937
2938 if (s->n_ref == 0)
2939 source_free(s);
2940 else if (r < 0)
2941 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2942 }
2943
2944 return 0;
2945 }
2946
2947 static int dispatch_exit(sd_event *e) {
2948 sd_event_source *p;
2949 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2950 int r;
2951
2952 assert(e);
2953
2954 p = prioq_peek(e->exit);
2955 if (!p || p->enabled == SD_EVENT_OFF) {
2956 e->state = SD_EVENT_FINISHED;
2957 return 0;
2958 }
2959
2960 ref = sd_event_ref(e);
2961 e->iteration++;
2962 e->state = SD_EVENT_EXITING;
2963 r = source_dispatch(p);
2964 e->state = SD_EVENT_INITIAL;
2965 return r;
2966 }
2967
2968 static sd_event_source* event_next_pending(sd_event *e) {
2969 sd_event_source *p;
2970
2971 assert(e);
2972
2973 p = prioq_peek(e->pending);
2974 if (!p)
2975 return NULL;
2976
2977 if (p->enabled == SD_EVENT_OFF)
2978 return NULL;
2979
2980 return p;
2981 }
2982
2983 static int arm_watchdog(sd_event *e) {
2984 struct itimerspec its = {};
2985 usec_t t;
2986 int r;
2987
2988 assert(e);
2989 assert(e->watchdog_fd >= 0);
2990
2991 t = sleep_between(e,
2992 e->watchdog_last + (e->watchdog_period / 2),
2993 e->watchdog_last + (e->watchdog_period * 3 / 4));
2994
2995 timespec_store(&its.it_value, t);
2996
2997 /* Make sure we never set the watchdog to 0, which tells the
2998 * kernel to disable it. */
2999 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
3000 its.it_value.tv_nsec = 1;
3001
3002 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
3003 if (r < 0)
3004 return -errno;
3005
3006 return 0;
3007 }
3008
3009 static int process_watchdog(sd_event *e) {
3010 assert(e);
3011
3012 if (!e->watchdog)
3013 return 0;
3014
3015 /* Don't notify watchdog too often */
3016 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3017 return 0;
3018
3019 sd_notify(false, "WATCHDOG=1");
3020 e->watchdog_last = e->timestamp.monotonic;
3021
3022 return arm_watchdog(e);
3023 }
3024
3025 static void event_close_inode_data_fds(sd_event *e) {
3026 struct inode_data *d;
3027
3028 assert(e);
3029
3030 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3031 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3032 * adjustments to the even source, such as changing the priority (which requires us to remove and re-add a watch
3033 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3034 * compromise. */
3035
3036 while ((d = e->inode_data_to_close)) {
3037 assert(d->fd >= 0);
3038 d->fd = safe_close(d->fd);
3039
3040 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3041 }
3042 }
3043
3044 _public_ int sd_event_prepare(sd_event *e) {
3045 int r;
3046
3047 assert_return(e, -EINVAL);
3048 assert_return(e = event_resolve(e), -ENOPKG);
3049 assert_return(!event_pid_changed(e), -ECHILD);
3050 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3051 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3052
3053 if (e->exit_requested)
3054 goto pending;
3055
3056 e->iteration++;
3057
3058 e->state = SD_EVENT_PREPARING;
3059 r = event_prepare(e);
3060 e->state = SD_EVENT_INITIAL;
3061 if (r < 0)
3062 return r;
3063
3064 r = event_arm_timer(e, &e->realtime);
3065 if (r < 0)
3066 return r;
3067
3068 r = event_arm_timer(e, &e->boottime);
3069 if (r < 0)
3070 return r;
3071
3072 r = event_arm_timer(e, &e->monotonic);
3073 if (r < 0)
3074 return r;
3075
3076 r = event_arm_timer(e, &e->realtime_alarm);
3077 if (r < 0)
3078 return r;
3079
3080 r = event_arm_timer(e, &e->boottime_alarm);
3081 if (r < 0)
3082 return r;
3083
3084 event_close_inode_data_fds(e);
3085
3086 if (event_next_pending(e) || e->need_process_child)
3087 goto pending;
3088
3089 e->state = SD_EVENT_ARMED;
3090
3091 return 0;
3092
3093 pending:
3094 e->state = SD_EVENT_ARMED;
3095 r = sd_event_wait(e, 0);
3096 if (r == 0)
3097 e->state = SD_EVENT_ARMED;
3098
3099 return r;
3100 }
3101
3102 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3103 struct epoll_event *ev_queue;
3104 unsigned ev_queue_max;
3105 int r, m, i;
3106
3107 assert_return(e, -EINVAL);
3108 assert_return(e = event_resolve(e), -ENOPKG);
3109 assert_return(!event_pid_changed(e), -ECHILD);
3110 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3111 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3112
3113 if (e->exit_requested) {
3114 e->state = SD_EVENT_PENDING;
3115 return 1;
3116 }
3117
3118 ev_queue_max = MAX(e->n_sources, 1u);
3119 ev_queue = newa(struct epoll_event, ev_queue_max);
3120
3121 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3122 if (e->inotify_data_buffered)
3123 timeout = 0;
3124
3125 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3126 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3127 if (m < 0) {
3128 if (errno == EINTR) {
3129 e->state = SD_EVENT_PENDING;
3130 return 1;
3131 }
3132
3133 r = -errno;
3134 goto finish;
3135 }
3136
3137 triple_timestamp_get(&e->timestamp);
3138
3139 for (i = 0; i < m; i++) {
3140
3141 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3142 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3143 else {
3144 WakeupType *t = ev_queue[i].data.ptr;
3145
3146 switch (*t) {
3147
3148 case WAKEUP_EVENT_SOURCE:
3149 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3150 break;
3151
3152 case WAKEUP_CLOCK_DATA: {
3153 struct clock_data *d = ev_queue[i].data.ptr;
3154 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3155 break;
3156 }
3157
3158 case WAKEUP_SIGNAL_DATA:
3159 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3160 break;
3161
3162 case WAKEUP_INOTIFY_DATA:
3163 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3164 break;
3165
3166 default:
3167 assert_not_reached("Invalid wake-up pointer");
3168 }
3169 }
3170 if (r < 0)
3171 goto finish;
3172 }
3173
3174 r = process_watchdog(e);
3175 if (r < 0)
3176 goto finish;
3177
3178 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3179 if (r < 0)
3180 goto finish;
3181
3182 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3183 if (r < 0)
3184 goto finish;
3185
3186 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3187 if (r < 0)
3188 goto finish;
3189
3190 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3191 if (r < 0)
3192 goto finish;
3193
3194 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3195 if (r < 0)
3196 goto finish;
3197
3198 if (e->need_process_child) {
3199 r = process_child(e);
3200 if (r < 0)
3201 goto finish;
3202 }
3203
3204 r = process_inotify(e);
3205 if (r < 0)
3206 goto finish;
3207
3208 if (event_next_pending(e)) {
3209 e->state = SD_EVENT_PENDING;
3210
3211 return 1;
3212 }
3213
3214 r = 0;
3215
3216 finish:
3217 e->state = SD_EVENT_INITIAL;
3218
3219 return r;
3220 }
3221
3222 _public_ int sd_event_dispatch(sd_event *e) {
3223 sd_event_source *p;
3224 int r;
3225
3226 assert_return(e, -EINVAL);
3227 assert_return(e = event_resolve(e), -ENOPKG);
3228 assert_return(!event_pid_changed(e), -ECHILD);
3229 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3230 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3231
3232 if (e->exit_requested)
3233 return dispatch_exit(e);
3234
3235 p = event_next_pending(e);
3236 if (p) {
3237 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3238
3239 ref = sd_event_ref(e);
3240 e->state = SD_EVENT_RUNNING;
3241 r = source_dispatch(p);
3242 e->state = SD_EVENT_INITIAL;
3243 return r;
3244 }
3245
3246 e->state = SD_EVENT_INITIAL;
3247
3248 return 1;
3249 }
3250
3251 static void event_log_delays(sd_event *e) {
3252 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1], *p;
3253 size_t l, i;
3254
3255 p = b;
3256 l = sizeof(b);
3257 for (i = 0; i < ELEMENTSOF(e->delays); i++) {
3258 l = strpcpyf(&p, l, "%u ", e->delays[i]);
3259 e->delays[i] = 0;
3260 }
3261 log_debug("Event loop iterations: %s", b);
3262 }
3263
3264 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3265 int r;
3266
3267 assert_return(e, -EINVAL);
3268 assert_return(e = event_resolve(e), -ENOPKG);
3269 assert_return(!event_pid_changed(e), -ECHILD);
3270 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3271 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3272
3273 if (e->profile_delays && e->last_run) {
3274 usec_t this_run;
3275 unsigned l;
3276
3277 this_run = now(CLOCK_MONOTONIC);
3278
3279 l = u64log2(this_run - e->last_run);
3280 assert(l < sizeof(e->delays));
3281 e->delays[l]++;
3282
3283 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3284 event_log_delays(e);
3285 e->last_log = this_run;
3286 }
3287 }
3288
3289 r = sd_event_prepare(e);
3290 if (r == 0)
3291 /* There was nothing? Then wait... */
3292 r = sd_event_wait(e, timeout);
3293
3294 if (e->profile_delays)
3295 e->last_run = now(CLOCK_MONOTONIC);
3296
3297 if (r > 0) {
3298 /* There's something now, then let's dispatch it */
3299 r = sd_event_dispatch(e);
3300 if (r < 0)
3301 return r;
3302
3303 return 1;
3304 }
3305
3306 return r;
3307 }
3308
3309 _public_ int sd_event_loop(sd_event *e) {
3310 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3311 int r;
3312
3313 assert_return(e, -EINVAL);
3314 assert_return(e = event_resolve(e), -ENOPKG);
3315 assert_return(!event_pid_changed(e), -ECHILD);
3316 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3317
3318 ref = sd_event_ref(e);
3319
3320 while (e->state != SD_EVENT_FINISHED) {
3321 r = sd_event_run(e, (uint64_t) -1);
3322 if (r < 0)
3323 return r;
3324 }
3325
3326 return e->exit_code;
3327 }
3328
3329 _public_ int sd_event_get_fd(sd_event *e) {
3330
3331 assert_return(e, -EINVAL);
3332 assert_return(e = event_resolve(e), -ENOPKG);
3333 assert_return(!event_pid_changed(e), -ECHILD);
3334
3335 return e->epoll_fd;
3336 }
3337
3338 _public_ int sd_event_get_state(sd_event *e) {
3339 assert_return(e, -EINVAL);
3340 assert_return(e = event_resolve(e), -ENOPKG);
3341 assert_return(!event_pid_changed(e), -ECHILD);
3342
3343 return e->state;
3344 }
3345
3346 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3347 assert_return(e, -EINVAL);
3348 assert_return(e = event_resolve(e), -ENOPKG);
3349 assert_return(code, -EINVAL);
3350 assert_return(!event_pid_changed(e), -ECHILD);
3351
3352 if (!e->exit_requested)
3353 return -ENODATA;
3354
3355 *code = e->exit_code;
3356 return 0;
3357 }
3358
3359 _public_ int sd_event_exit(sd_event *e, int code) {
3360 assert_return(e, -EINVAL);
3361 assert_return(e = event_resolve(e), -ENOPKG);
3362 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3363 assert_return(!event_pid_changed(e), -ECHILD);
3364
3365 e->exit_requested = true;
3366 e->exit_code = code;
3367
3368 return 0;
3369 }
3370
3371 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3372 assert_return(e, -EINVAL);
3373 assert_return(e = event_resolve(e), -ENOPKG);
3374 assert_return(usec, -EINVAL);
3375 assert_return(!event_pid_changed(e), -ECHILD);
3376
3377 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3378 return -EOPNOTSUPP;
3379
3380 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3381 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3382 * the purpose of getting the time this doesn't matter. */
3383 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3384 return -EOPNOTSUPP;
3385
3386 if (!triple_timestamp_is_set(&e->timestamp)) {
3387 /* Implicitly fall back to now() if we never ran
3388 * before and thus have no cached time. */
3389 *usec = now(clock);
3390 return 1;
3391 }
3392
3393 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3394 return 0;
3395 }
3396
3397 _public_ int sd_event_default(sd_event **ret) {
3398 sd_event *e = NULL;
3399 int r;
3400
3401 if (!ret)
3402 return !!default_event;
3403
3404 if (default_event) {
3405 *ret = sd_event_ref(default_event);
3406 return 0;
3407 }
3408
3409 r = sd_event_new(&e);
3410 if (r < 0)
3411 return r;
3412
3413 e->default_event_ptr = &default_event;
3414 e->tid = gettid();
3415 default_event = e;
3416
3417 *ret = e;
3418 return 1;
3419 }
3420
3421 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3422 assert_return(e, -EINVAL);
3423 assert_return(e = event_resolve(e), -ENOPKG);
3424 assert_return(tid, -EINVAL);
3425 assert_return(!event_pid_changed(e), -ECHILD);
3426
3427 if (e->tid != 0) {
3428 *tid = e->tid;
3429 return 0;
3430 }
3431
3432 return -ENXIO;
3433 }
3434
3435 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3436 int r;
3437
3438 assert_return(e, -EINVAL);
3439 assert_return(e = event_resolve(e), -ENOPKG);
3440 assert_return(!event_pid_changed(e), -ECHILD);
3441
3442 if (e->watchdog == !!b)
3443 return e->watchdog;
3444
3445 if (b) {
3446 struct epoll_event ev;
3447
3448 r = sd_watchdog_enabled(false, &e->watchdog_period);
3449 if (r <= 0)
3450 return r;
3451
3452 /* Issue first ping immediately */
3453 sd_notify(false, "WATCHDOG=1");
3454 e->watchdog_last = now(CLOCK_MONOTONIC);
3455
3456 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3457 if (e->watchdog_fd < 0)
3458 return -errno;
3459
3460 r = arm_watchdog(e);
3461 if (r < 0)
3462 goto fail;
3463
3464 ev = (struct epoll_event) {
3465 .events = EPOLLIN,
3466 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3467 };
3468
3469 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3470 if (r < 0) {
3471 r = -errno;
3472 goto fail;
3473 }
3474
3475 } else {
3476 if (e->watchdog_fd >= 0) {
3477 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3478 e->watchdog_fd = safe_close(e->watchdog_fd);
3479 }
3480 }
3481
3482 e->watchdog = !!b;
3483 return e->watchdog;
3484
3485 fail:
3486 e->watchdog_fd = safe_close(e->watchdog_fd);
3487 return r;
3488 }
3489
3490 _public_ int sd_event_get_watchdog(sd_event *e) {
3491 assert_return(e, -EINVAL);
3492 assert_return(e = event_resolve(e), -ENOPKG);
3493 assert_return(!event_pid_changed(e), -ECHILD);
3494
3495 return e->watchdog;
3496 }
3497
3498 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3499 assert_return(e, -EINVAL);
3500 assert_return(e = event_resolve(e), -ENOPKG);
3501 assert_return(!event_pid_changed(e), -ECHILD);
3502
3503 *ret = e->iteration;
3504 return 0;
3505 }
3506
3507 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3508 assert_return(s, -EINVAL);
3509
3510 s->destroy_callback = callback;
3511 return 0;
3512 }
3513
3514 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3515 assert_return(s, -EINVAL);
3516
3517 if (ret)
3518 *ret = s->destroy_callback;
3519
3520 return !!s->destroy_callback;
3521 }
3522
3523 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3524 assert_return(s, -EINVAL);
3525
3526 return s->floating;
3527 }
3528
3529 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3530 assert_return(s, -EINVAL);
3531
3532 if (s->floating == !!b)
3533 return 0;
3534
3535 if (!s->event) /* Already disconnected */
3536 return -ESTALE;
3537
3538 s->floating = b;
3539
3540 if (b) {
3541 sd_event_source_ref(s);
3542 sd_event_unref(s->event);
3543 } else {
3544 sd_event_ref(s->event);
3545 sd_event_source_unref(s);
3546 }
3547
3548 return 1;
3549 }