]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/libsystemd/sd-event/sd-event.c
sd-event: use DIV_ROUND_UP where appropriate
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <sys/epoll.h>
4 #include <sys/timerfd.h>
5 #include <sys/wait.h>
6
7 #include "sd-daemon.h"
8 #include "sd-event.h"
9 #include "sd-id128.h"
10
11 #include "alloc-util.h"
12 #include "event-source.h"
13 #include "fd-util.h"
14 #include "fs-util.h"
15 #include "hashmap.h"
16 #include "list.h"
17 #include "macro.h"
18 #include "memory-util.h"
19 #include "missing.h"
20 #include "prioq.h"
21 #include "process-util.h"
22 #include "set.h"
23 #include "signal-util.h"
24 #include "string-table.h"
25 #include "string-util.h"
26 #include "time-util.h"
27
28 #define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
29
30 static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
31 [SOURCE_IO] = "io",
32 [SOURCE_TIME_REALTIME] = "realtime",
33 [SOURCE_TIME_BOOTTIME] = "bootime",
34 [SOURCE_TIME_MONOTONIC] = "monotonic",
35 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
36 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
37 [SOURCE_SIGNAL] = "signal",
38 [SOURCE_CHILD] = "child",
39 [SOURCE_DEFER] = "defer",
40 [SOURCE_POST] = "post",
41 [SOURCE_EXIT] = "exit",
42 [SOURCE_WATCHDOG] = "watchdog",
43 [SOURCE_INOTIFY] = "inotify",
44 };
45
46 DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
47
48 #define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
49
50 struct sd_event {
51 unsigned n_ref;
52
53 int epoll_fd;
54 int watchdog_fd;
55
56 Prioq *pending;
57 Prioq *prepare;
58
59 /* timerfd_create() only supports these five clocks so far. We
60 * can add support for more clocks when the kernel learns to
61 * deal with them, too. */
62 struct clock_data realtime;
63 struct clock_data boottime;
64 struct clock_data monotonic;
65 struct clock_data realtime_alarm;
66 struct clock_data boottime_alarm;
67
68 usec_t perturb;
69
70 sd_event_source **signal_sources; /* indexed by signal number */
71 Hashmap *signal_data; /* indexed by priority */
72
73 Hashmap *child_sources;
74 unsigned n_enabled_child_sources;
75
76 Set *post_sources;
77
78 Prioq *exit;
79
80 Hashmap *inotify_data; /* indexed by priority */
81
82 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
83 LIST_HEAD(struct inode_data, inode_data_to_close);
84
85 /* A list of inotify objects that already have events buffered which aren't processed yet */
86 LIST_HEAD(struct inotify_data, inotify_data_buffered);
87
88 pid_t original_pid;
89
90 uint64_t iteration;
91 triple_timestamp timestamp;
92 int state;
93
94 bool exit_requested:1;
95 bool need_process_child:1;
96 bool watchdog:1;
97 bool profile_delays:1;
98
99 int exit_code;
100
101 pid_t tid;
102 sd_event **default_event_ptr;
103
104 usec_t watchdog_last, watchdog_period;
105
106 unsigned n_sources;
107
108 LIST_HEAD(sd_event_source, sources);
109
110 usec_t last_run, last_log;
111 unsigned delays[sizeof(usec_t) * 8];
112 };
113
114 static thread_local sd_event *default_event = NULL;
115
116 static void source_disconnect(sd_event_source *s);
117 static void event_gc_inode_data(sd_event *e, struct inode_data *d);
118
119 static sd_event *event_resolve(sd_event *e) {
120 return e == SD_EVENT_DEFAULT ? default_event : e;
121 }
122
123 static int pending_prioq_compare(const void *a, const void *b) {
124 const sd_event_source *x = a, *y = b;
125 int r;
126
127 assert(x->pending);
128 assert(y->pending);
129
130 /* Enabled ones first */
131 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
132 return -1;
133 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
134 return 1;
135
136 /* Lower priority values first */
137 r = CMP(x->priority, y->priority);
138 if (r != 0)
139 return r;
140
141 /* Older entries first */
142 return CMP(x->pending_iteration, y->pending_iteration);
143 }
144
145 static int prepare_prioq_compare(const void *a, const void *b) {
146 const sd_event_source *x = a, *y = b;
147 int r;
148
149 assert(x->prepare);
150 assert(y->prepare);
151
152 /* Enabled ones first */
153 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
154 return -1;
155 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
156 return 1;
157
158 /* Move most recently prepared ones last, so that we can stop
159 * preparing as soon as we hit one that has already been
160 * prepared in the current iteration */
161 r = CMP(x->prepare_iteration, y->prepare_iteration);
162 if (r != 0)
163 return r;
164
165 /* Lower priority values first */
166 return CMP(x->priority, y->priority);
167 }
168
169 static int earliest_time_prioq_compare(const void *a, const void *b) {
170 const sd_event_source *x = a, *y = b;
171
172 assert(EVENT_SOURCE_IS_TIME(x->type));
173 assert(x->type == y->type);
174
175 /* Enabled ones first */
176 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
177 return -1;
178 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
179 return 1;
180
181 /* Move the pending ones to the end */
182 if (!x->pending && y->pending)
183 return -1;
184 if (x->pending && !y->pending)
185 return 1;
186
187 /* Order by time */
188 return CMP(x->time.next, y->time.next);
189 }
190
191 static usec_t time_event_source_latest(const sd_event_source *s) {
192 return usec_add(s->time.next, s->time.accuracy);
193 }
194
195 static int latest_time_prioq_compare(const void *a, const void *b) {
196 const sd_event_source *x = a, *y = b;
197
198 assert(EVENT_SOURCE_IS_TIME(x->type));
199 assert(x->type == y->type);
200
201 /* Enabled ones first */
202 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
203 return -1;
204 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
205 return 1;
206
207 /* Move the pending ones to the end */
208 if (!x->pending && y->pending)
209 return -1;
210 if (x->pending && !y->pending)
211 return 1;
212
213 /* Order by time */
214 return CMP(time_event_source_latest(x), time_event_source_latest(y));
215 }
216
217 static int exit_prioq_compare(const void *a, const void *b) {
218 const sd_event_source *x = a, *y = b;
219
220 assert(x->type == SOURCE_EXIT);
221 assert(y->type == SOURCE_EXIT);
222
223 /* Enabled ones first */
224 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
225 return -1;
226 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
227 return 1;
228
229 /* Lower priority values first */
230 return CMP(x->priority, y->priority);
231 }
232
233 static void free_clock_data(struct clock_data *d) {
234 assert(d);
235 assert(d->wakeup == WAKEUP_CLOCK_DATA);
236
237 safe_close(d->fd);
238 prioq_free(d->earliest);
239 prioq_free(d->latest);
240 }
241
242 static sd_event *event_free(sd_event *e) {
243 sd_event_source *s;
244
245 assert(e);
246
247 while ((s = e->sources)) {
248 assert(s->floating);
249 source_disconnect(s);
250 sd_event_source_unref(s);
251 }
252
253 assert(e->n_sources == 0);
254
255 if (e->default_event_ptr)
256 *(e->default_event_ptr) = NULL;
257
258 safe_close(e->epoll_fd);
259 safe_close(e->watchdog_fd);
260
261 free_clock_data(&e->realtime);
262 free_clock_data(&e->boottime);
263 free_clock_data(&e->monotonic);
264 free_clock_data(&e->realtime_alarm);
265 free_clock_data(&e->boottime_alarm);
266
267 prioq_free(e->pending);
268 prioq_free(e->prepare);
269 prioq_free(e->exit);
270
271 free(e->signal_sources);
272 hashmap_free(e->signal_data);
273
274 hashmap_free(e->inotify_data);
275
276 hashmap_free(e->child_sources);
277 set_free(e->post_sources);
278
279 return mfree(e);
280 }
281
282 _public_ int sd_event_new(sd_event** ret) {
283 sd_event *e;
284 int r;
285
286 assert_return(ret, -EINVAL);
287
288 e = new(sd_event, 1);
289 if (!e)
290 return -ENOMEM;
291
292 *e = (sd_event) {
293 .n_ref = 1,
294 .epoll_fd = -1,
295 .watchdog_fd = -1,
296 .realtime.wakeup = WAKEUP_CLOCK_DATA,
297 .realtime.fd = -1,
298 .realtime.next = USEC_INFINITY,
299 .boottime.wakeup = WAKEUP_CLOCK_DATA,
300 .boottime.fd = -1,
301 .boottime.next = USEC_INFINITY,
302 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
303 .monotonic.fd = -1,
304 .monotonic.next = USEC_INFINITY,
305 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
306 .realtime_alarm.fd = -1,
307 .realtime_alarm.next = USEC_INFINITY,
308 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
309 .boottime_alarm.fd = -1,
310 .boottime_alarm.next = USEC_INFINITY,
311 .perturb = USEC_INFINITY,
312 .original_pid = getpid_cached(),
313 };
314
315 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
316 if (r < 0)
317 goto fail;
318
319 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
320 if (e->epoll_fd < 0) {
321 r = -errno;
322 goto fail;
323 }
324
325 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
326
327 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
328 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
329 e->profile_delays = true;
330 }
331
332 *ret = e;
333 return 0;
334
335 fail:
336 event_free(e);
337 return r;
338 }
339
340 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
341
342 static bool event_pid_changed(sd_event *e) {
343 assert(e);
344
345 /* We don't support people creating an event loop and keeping
346 * it around over a fork(). Let's complain. */
347
348 return e->original_pid != getpid_cached();
349 }
350
351 static void source_io_unregister(sd_event_source *s) {
352 int r;
353
354 assert(s);
355 assert(s->type == SOURCE_IO);
356
357 if (event_pid_changed(s->event))
358 return;
359
360 if (!s->io.registered)
361 return;
362
363 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
364 if (r < 0)
365 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
366 strna(s->description), event_source_type_to_string(s->type));
367
368 s->io.registered = false;
369 }
370
371 static int source_io_register(
372 sd_event_source *s,
373 int enabled,
374 uint32_t events) {
375
376 struct epoll_event ev;
377 int r;
378
379 assert(s);
380 assert(s->type == SOURCE_IO);
381 assert(enabled != SD_EVENT_OFF);
382
383 ev = (struct epoll_event) {
384 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
385 .data.ptr = s,
386 };
387
388 if (s->io.registered)
389 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
390 else
391 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
392 if (r < 0)
393 return -errno;
394
395 s->io.registered = true;
396
397 return 0;
398 }
399
400 static clockid_t event_source_type_to_clock(EventSourceType t) {
401
402 switch (t) {
403
404 case SOURCE_TIME_REALTIME:
405 return CLOCK_REALTIME;
406
407 case SOURCE_TIME_BOOTTIME:
408 return CLOCK_BOOTTIME;
409
410 case SOURCE_TIME_MONOTONIC:
411 return CLOCK_MONOTONIC;
412
413 case SOURCE_TIME_REALTIME_ALARM:
414 return CLOCK_REALTIME_ALARM;
415
416 case SOURCE_TIME_BOOTTIME_ALARM:
417 return CLOCK_BOOTTIME_ALARM;
418
419 default:
420 return (clockid_t) -1;
421 }
422 }
423
424 static EventSourceType clock_to_event_source_type(clockid_t clock) {
425
426 switch (clock) {
427
428 case CLOCK_REALTIME:
429 return SOURCE_TIME_REALTIME;
430
431 case CLOCK_BOOTTIME:
432 return SOURCE_TIME_BOOTTIME;
433
434 case CLOCK_MONOTONIC:
435 return SOURCE_TIME_MONOTONIC;
436
437 case CLOCK_REALTIME_ALARM:
438 return SOURCE_TIME_REALTIME_ALARM;
439
440 case CLOCK_BOOTTIME_ALARM:
441 return SOURCE_TIME_BOOTTIME_ALARM;
442
443 default:
444 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
445 }
446 }
447
448 static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
449 assert(e);
450
451 switch (t) {
452
453 case SOURCE_TIME_REALTIME:
454 return &e->realtime;
455
456 case SOURCE_TIME_BOOTTIME:
457 return &e->boottime;
458
459 case SOURCE_TIME_MONOTONIC:
460 return &e->monotonic;
461
462 case SOURCE_TIME_REALTIME_ALARM:
463 return &e->realtime_alarm;
464
465 case SOURCE_TIME_BOOTTIME_ALARM:
466 return &e->boottime_alarm;
467
468 default:
469 return NULL;
470 }
471 }
472
473 static void event_free_signal_data(sd_event *e, struct signal_data *d) {
474 assert(e);
475
476 if (!d)
477 return;
478
479 hashmap_remove(e->signal_data, &d->priority);
480 safe_close(d->fd);
481 free(d);
482 }
483
484 static int event_make_signal_data(
485 sd_event *e,
486 int sig,
487 struct signal_data **ret) {
488
489 struct epoll_event ev;
490 struct signal_data *d;
491 bool added = false;
492 sigset_t ss_copy;
493 int64_t priority;
494 int r;
495
496 assert(e);
497
498 if (event_pid_changed(e))
499 return -ECHILD;
500
501 if (e->signal_sources && e->signal_sources[sig])
502 priority = e->signal_sources[sig]->priority;
503 else
504 priority = SD_EVENT_PRIORITY_NORMAL;
505
506 d = hashmap_get(e->signal_data, &priority);
507 if (d) {
508 if (sigismember(&d->sigset, sig) > 0) {
509 if (ret)
510 *ret = d;
511 return 0;
512 }
513 } else {
514 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
515 if (r < 0)
516 return r;
517
518 d = new(struct signal_data, 1);
519 if (!d)
520 return -ENOMEM;
521
522 *d = (struct signal_data) {
523 .wakeup = WAKEUP_SIGNAL_DATA,
524 .fd = -1,
525 .priority = priority,
526 };
527
528 r = hashmap_put(e->signal_data, &d->priority, d);
529 if (r < 0) {
530 free(d);
531 return r;
532 }
533
534 added = true;
535 }
536
537 ss_copy = d->sigset;
538 assert_se(sigaddset(&ss_copy, sig) >= 0);
539
540 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
541 if (r < 0) {
542 r = -errno;
543 goto fail;
544 }
545
546 d->sigset = ss_copy;
547
548 if (d->fd >= 0) {
549 if (ret)
550 *ret = d;
551 return 0;
552 }
553
554 d->fd = fd_move_above_stdio(r);
555
556 ev = (struct epoll_event) {
557 .events = EPOLLIN,
558 .data.ptr = d,
559 };
560
561 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
562 if (r < 0) {
563 r = -errno;
564 goto fail;
565 }
566
567 if (ret)
568 *ret = d;
569
570 return 0;
571
572 fail:
573 if (added)
574 event_free_signal_data(e, d);
575
576 return r;
577 }
578
579 static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
580 assert(e);
581 assert(d);
582
583 /* Turns off the specified signal in the signal data
584 * object. If the signal mask of the object becomes empty that
585 * way removes it. */
586
587 if (sigismember(&d->sigset, sig) == 0)
588 return;
589
590 assert_se(sigdelset(&d->sigset, sig) >= 0);
591
592 if (sigisemptyset(&d->sigset)) {
593 /* If all the mask is all-zero we can get rid of the structure */
594 event_free_signal_data(e, d);
595 return;
596 }
597
598 assert(d->fd >= 0);
599
600 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
601 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
602 }
603
604 static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
605 struct signal_data *d;
606 static const int64_t zero_priority = 0;
607
608 assert(e);
609
610 /* Rechecks if the specified signal is still something we are
611 * interested in. If not, we'll unmask it, and possibly drop
612 * the signalfd for it. */
613
614 if (sig == SIGCHLD &&
615 e->n_enabled_child_sources > 0)
616 return;
617
618 if (e->signal_sources &&
619 e->signal_sources[sig] &&
620 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
621 return;
622
623 /*
624 * The specified signal might be enabled in three different queues:
625 *
626 * 1) the one that belongs to the priority passed (if it is non-NULL)
627 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
628 * 3) the 0 priority (to cover the SIGCHLD case)
629 *
630 * Hence, let's remove it from all three here.
631 */
632
633 if (priority) {
634 d = hashmap_get(e->signal_data, priority);
635 if (d)
636 event_unmask_signal_data(e, d, sig);
637 }
638
639 if (e->signal_sources && e->signal_sources[sig]) {
640 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
641 if (d)
642 event_unmask_signal_data(e, d, sig);
643 }
644
645 d = hashmap_get(e->signal_data, &zero_priority);
646 if (d)
647 event_unmask_signal_data(e, d, sig);
648 }
649
650 static void source_disconnect(sd_event_source *s) {
651 sd_event *event;
652
653 assert(s);
654
655 if (!s->event)
656 return;
657
658 assert(s->event->n_sources > 0);
659
660 switch (s->type) {
661
662 case SOURCE_IO:
663 if (s->io.fd >= 0)
664 source_io_unregister(s);
665
666 break;
667
668 case SOURCE_TIME_REALTIME:
669 case SOURCE_TIME_BOOTTIME:
670 case SOURCE_TIME_MONOTONIC:
671 case SOURCE_TIME_REALTIME_ALARM:
672 case SOURCE_TIME_BOOTTIME_ALARM: {
673 struct clock_data *d;
674
675 d = event_get_clock_data(s->event, s->type);
676 assert(d);
677
678 prioq_remove(d->earliest, s, &s->time.earliest_index);
679 prioq_remove(d->latest, s, &s->time.latest_index);
680 d->needs_rearm = true;
681 break;
682 }
683
684 case SOURCE_SIGNAL:
685 if (s->signal.sig > 0) {
686
687 if (s->event->signal_sources)
688 s->event->signal_sources[s->signal.sig] = NULL;
689
690 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
691 }
692
693 break;
694
695 case SOURCE_CHILD:
696 if (s->child.pid > 0) {
697 if (s->enabled != SD_EVENT_OFF) {
698 assert(s->event->n_enabled_child_sources > 0);
699 s->event->n_enabled_child_sources--;
700 }
701
702 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
703 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
704 }
705
706 break;
707
708 case SOURCE_DEFER:
709 /* nothing */
710 break;
711
712 case SOURCE_POST:
713 set_remove(s->event->post_sources, s);
714 break;
715
716 case SOURCE_EXIT:
717 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
718 break;
719
720 case SOURCE_INOTIFY: {
721 struct inode_data *inode_data;
722
723 inode_data = s->inotify.inode_data;
724 if (inode_data) {
725 struct inotify_data *inotify_data;
726 assert_se(inotify_data = inode_data->inotify_data);
727
728 /* Detach this event source from the inode object */
729 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
730 s->inotify.inode_data = NULL;
731
732 if (s->pending) {
733 assert(inotify_data->n_pending > 0);
734 inotify_data->n_pending--;
735 }
736
737 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
738 * continued to being watched. That's because inotify doesn't really have an API for that: we
739 * can only change watch masks with access to the original inode either by fd or by path. But
740 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
741 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
742 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
743 * there), but given the need for open_by_handle_at() which is privileged and not universally
744 * available this would be quite an incomplete solution. Hence we go the other way, leave the
745 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
746 * anymore after reception. Yes, this sucks, but … Linux … */
747
748 /* Maybe release the inode data (and its inotify) */
749 event_gc_inode_data(s->event, inode_data);
750 }
751
752 break;
753 }
754
755 default:
756 assert_not_reached("Wut? I shouldn't exist.");
757 }
758
759 if (s->pending)
760 prioq_remove(s->event->pending, s, &s->pending_index);
761
762 if (s->prepare)
763 prioq_remove(s->event->prepare, s, &s->prepare_index);
764
765 event = s->event;
766
767 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
768 s->event = NULL;
769 LIST_REMOVE(sources, event->sources, s);
770 event->n_sources--;
771
772 if (!s->floating)
773 sd_event_unref(event);
774 }
775
776 static void source_free(sd_event_source *s) {
777 assert(s);
778
779 source_disconnect(s);
780
781 if (s->type == SOURCE_IO && s->io.owned)
782 s->io.fd = safe_close(s->io.fd);
783
784 if (s->destroy_callback)
785 s->destroy_callback(s->userdata);
786
787 free(s->description);
788 free(s);
789 }
790 DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
791
792 static int source_set_pending(sd_event_source *s, bool b) {
793 int r;
794
795 assert(s);
796 assert(s->type != SOURCE_EXIT);
797
798 if (s->pending == b)
799 return 0;
800
801 s->pending = b;
802
803 if (b) {
804 s->pending_iteration = s->event->iteration;
805
806 r = prioq_put(s->event->pending, s, &s->pending_index);
807 if (r < 0) {
808 s->pending = false;
809 return r;
810 }
811 } else
812 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
813
814 if (EVENT_SOURCE_IS_TIME(s->type)) {
815 struct clock_data *d;
816
817 d = event_get_clock_data(s->event, s->type);
818 assert(d);
819
820 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
821 prioq_reshuffle(d->latest, s, &s->time.latest_index);
822 d->needs_rearm = true;
823 }
824
825 if (s->type == SOURCE_SIGNAL && !b) {
826 struct signal_data *d;
827
828 d = hashmap_get(s->event->signal_data, &s->priority);
829 if (d && d->current == s)
830 d->current = NULL;
831 }
832
833 if (s->type == SOURCE_INOTIFY) {
834
835 assert(s->inotify.inode_data);
836 assert(s->inotify.inode_data->inotify_data);
837
838 if (b)
839 s->inotify.inode_data->inotify_data->n_pending ++;
840 else {
841 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
842 s->inotify.inode_data->inotify_data->n_pending --;
843 }
844 }
845
846 return 0;
847 }
848
849 static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
850 sd_event_source *s;
851
852 assert(e);
853
854 s = new(sd_event_source, 1);
855 if (!s)
856 return NULL;
857
858 *s = (struct sd_event_source) {
859 .n_ref = 1,
860 .event = e,
861 .floating = floating,
862 .type = type,
863 .pending_index = PRIOQ_IDX_NULL,
864 .prepare_index = PRIOQ_IDX_NULL,
865 };
866
867 if (!floating)
868 sd_event_ref(e);
869
870 LIST_PREPEND(sources, e->sources, s);
871 e->n_sources++;
872
873 return s;
874 }
875
876 _public_ int sd_event_add_io(
877 sd_event *e,
878 sd_event_source **ret,
879 int fd,
880 uint32_t events,
881 sd_event_io_handler_t callback,
882 void *userdata) {
883
884 _cleanup_(source_freep) sd_event_source *s = NULL;
885 int r;
886
887 assert_return(e, -EINVAL);
888 assert_return(e = event_resolve(e), -ENOPKG);
889 assert_return(fd >= 0, -EBADF);
890 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
891 assert_return(callback, -EINVAL);
892 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
893 assert_return(!event_pid_changed(e), -ECHILD);
894
895 s = source_new(e, !ret, SOURCE_IO);
896 if (!s)
897 return -ENOMEM;
898
899 s->wakeup = WAKEUP_EVENT_SOURCE;
900 s->io.fd = fd;
901 s->io.events = events;
902 s->io.callback = callback;
903 s->userdata = userdata;
904 s->enabled = SD_EVENT_ON;
905
906 r = source_io_register(s, s->enabled, events);
907 if (r < 0)
908 return r;
909
910 if (ret)
911 *ret = s;
912 TAKE_PTR(s);
913
914 return 0;
915 }
916
917 static void initialize_perturb(sd_event *e) {
918 sd_id128_t bootid = {};
919
920 /* When we sleep for longer, we try to realign the wakeup to
921 the same time within each minute/second/250ms, so that
922 events all across the system can be coalesced into a single
923 CPU wakeup. However, let's take some system-specific
924 randomness for this value, so that in a network of systems
925 with synced clocks timer events are distributed a
926 bit. Here, we calculate a perturbation usec offset from the
927 boot ID. */
928
929 if (_likely_(e->perturb != USEC_INFINITY))
930 return;
931
932 if (sd_id128_get_boot(&bootid) >= 0)
933 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
934 }
935
936 static int event_setup_timer_fd(
937 sd_event *e,
938 struct clock_data *d,
939 clockid_t clock) {
940
941 struct epoll_event ev;
942 int r, fd;
943
944 assert(e);
945 assert(d);
946
947 if (_likely_(d->fd >= 0))
948 return 0;
949
950 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
951 if (fd < 0)
952 return -errno;
953
954 fd = fd_move_above_stdio(fd);
955
956 ev = (struct epoll_event) {
957 .events = EPOLLIN,
958 .data.ptr = d,
959 };
960
961 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
962 if (r < 0) {
963 safe_close(fd);
964 return -errno;
965 }
966
967 d->fd = fd;
968 return 0;
969 }
970
971 static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
972 assert(s);
973
974 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
975 }
976
977 _public_ int sd_event_add_time(
978 sd_event *e,
979 sd_event_source **ret,
980 clockid_t clock,
981 uint64_t usec,
982 uint64_t accuracy,
983 sd_event_time_handler_t callback,
984 void *userdata) {
985
986 EventSourceType type;
987 _cleanup_(source_freep) sd_event_source *s = NULL;
988 struct clock_data *d;
989 int r;
990
991 assert_return(e, -EINVAL);
992 assert_return(e = event_resolve(e), -ENOPKG);
993 assert_return(accuracy != (uint64_t) -1, -EINVAL);
994 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
995 assert_return(!event_pid_changed(e), -ECHILD);
996
997 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
998 return -EOPNOTSUPP;
999
1000 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1001 if (type < 0)
1002 return -EOPNOTSUPP;
1003
1004 if (!callback)
1005 callback = time_exit_callback;
1006
1007 d = event_get_clock_data(e, type);
1008 assert(d);
1009
1010 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1011 if (r < 0)
1012 return r;
1013
1014 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1015 if (r < 0)
1016 return r;
1017
1018 if (d->fd < 0) {
1019 r = event_setup_timer_fd(e, d, clock);
1020 if (r < 0)
1021 return r;
1022 }
1023
1024 s = source_new(e, !ret, type);
1025 if (!s)
1026 return -ENOMEM;
1027
1028 s->time.next = usec;
1029 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
1030 s->time.callback = callback;
1031 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
1032 s->userdata = userdata;
1033 s->enabled = SD_EVENT_ONESHOT;
1034
1035 d->needs_rearm = true;
1036
1037 r = prioq_put(d->earliest, s, &s->time.earliest_index);
1038 if (r < 0)
1039 return r;
1040
1041 r = prioq_put(d->latest, s, &s->time.latest_index);
1042 if (r < 0)
1043 return r;
1044
1045 if (ret)
1046 *ret = s;
1047 TAKE_PTR(s);
1048
1049 return 0;
1050 }
1051
1052 static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1053 assert(s);
1054
1055 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1056 }
1057
1058 _public_ int sd_event_add_signal(
1059 sd_event *e,
1060 sd_event_source **ret,
1061 int sig,
1062 sd_event_signal_handler_t callback,
1063 void *userdata) {
1064
1065 _cleanup_(source_freep) sd_event_source *s = NULL;
1066 struct signal_data *d;
1067 sigset_t ss;
1068 int r;
1069
1070 assert_return(e, -EINVAL);
1071 assert_return(e = event_resolve(e), -ENOPKG);
1072 assert_return(SIGNAL_VALID(sig), -EINVAL);
1073 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1074 assert_return(!event_pid_changed(e), -ECHILD);
1075
1076 if (!callback)
1077 callback = signal_exit_callback;
1078
1079 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
1080 if (r != 0)
1081 return -r;
1082
1083 if (!sigismember(&ss, sig))
1084 return -EBUSY;
1085
1086 if (!e->signal_sources) {
1087 e->signal_sources = new0(sd_event_source*, _NSIG);
1088 if (!e->signal_sources)
1089 return -ENOMEM;
1090 } else if (e->signal_sources[sig])
1091 return -EBUSY;
1092
1093 s = source_new(e, !ret, SOURCE_SIGNAL);
1094 if (!s)
1095 return -ENOMEM;
1096
1097 s->signal.sig = sig;
1098 s->signal.callback = callback;
1099 s->userdata = userdata;
1100 s->enabled = SD_EVENT_ON;
1101
1102 e->signal_sources[sig] = s;
1103
1104 r = event_make_signal_data(e, sig, &d);
1105 if (r < 0)
1106 return r;
1107
1108 /* Use the signal name as description for the event source by default */
1109 (void) sd_event_source_set_description(s, signal_to_string(sig));
1110
1111 if (ret)
1112 *ret = s;
1113 TAKE_PTR(s);
1114
1115 return 0;
1116 }
1117
1118 _public_ int sd_event_add_child(
1119 sd_event *e,
1120 sd_event_source **ret,
1121 pid_t pid,
1122 int options,
1123 sd_event_child_handler_t callback,
1124 void *userdata) {
1125
1126 _cleanup_(source_freep) sd_event_source *s = NULL;
1127 int r;
1128
1129 assert_return(e, -EINVAL);
1130 assert_return(e = event_resolve(e), -ENOPKG);
1131 assert_return(pid > 1, -EINVAL);
1132 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1133 assert_return(options != 0, -EINVAL);
1134 assert_return(callback, -EINVAL);
1135 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1136 assert_return(!event_pid_changed(e), -ECHILD);
1137
1138 r = hashmap_ensure_allocated(&e->child_sources, NULL);
1139 if (r < 0)
1140 return r;
1141
1142 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
1143 return -EBUSY;
1144
1145 s = source_new(e, !ret, SOURCE_CHILD);
1146 if (!s)
1147 return -ENOMEM;
1148
1149 s->child.pid = pid;
1150 s->child.options = options;
1151 s->child.callback = callback;
1152 s->userdata = userdata;
1153 s->enabled = SD_EVENT_ONESHOT;
1154
1155 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
1156 if (r < 0)
1157 return r;
1158
1159 e->n_enabled_child_sources++;
1160
1161 r = event_make_signal_data(e, SIGCHLD, NULL);
1162 if (r < 0) {
1163 e->n_enabled_child_sources--;
1164 return r;
1165 }
1166
1167 e->need_process_child = true;
1168
1169 if (ret)
1170 *ret = s;
1171 TAKE_PTR(s);
1172
1173 return 0;
1174 }
1175
1176 _public_ int sd_event_add_defer(
1177 sd_event *e,
1178 sd_event_source **ret,
1179 sd_event_handler_t callback,
1180 void *userdata) {
1181
1182 _cleanup_(source_freep) sd_event_source *s = NULL;
1183 int r;
1184
1185 assert_return(e, -EINVAL);
1186 assert_return(e = event_resolve(e), -ENOPKG);
1187 assert_return(callback, -EINVAL);
1188 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1189 assert_return(!event_pid_changed(e), -ECHILD);
1190
1191 s = source_new(e, !ret, SOURCE_DEFER);
1192 if (!s)
1193 return -ENOMEM;
1194
1195 s->defer.callback = callback;
1196 s->userdata = userdata;
1197 s->enabled = SD_EVENT_ONESHOT;
1198
1199 r = source_set_pending(s, true);
1200 if (r < 0)
1201 return r;
1202
1203 if (ret)
1204 *ret = s;
1205 TAKE_PTR(s);
1206
1207 return 0;
1208 }
1209
1210 _public_ int sd_event_add_post(
1211 sd_event *e,
1212 sd_event_source **ret,
1213 sd_event_handler_t callback,
1214 void *userdata) {
1215
1216 _cleanup_(source_freep) sd_event_source *s = NULL;
1217 int r;
1218
1219 assert_return(e, -EINVAL);
1220 assert_return(e = event_resolve(e), -ENOPKG);
1221 assert_return(callback, -EINVAL);
1222 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1223 assert_return(!event_pid_changed(e), -ECHILD);
1224
1225 r = set_ensure_allocated(&e->post_sources, NULL);
1226 if (r < 0)
1227 return r;
1228
1229 s = source_new(e, !ret, SOURCE_POST);
1230 if (!s)
1231 return -ENOMEM;
1232
1233 s->post.callback = callback;
1234 s->userdata = userdata;
1235 s->enabled = SD_EVENT_ON;
1236
1237 r = set_put(e->post_sources, s);
1238 if (r < 0)
1239 return r;
1240
1241 if (ret)
1242 *ret = s;
1243 TAKE_PTR(s);
1244
1245 return 0;
1246 }
1247
1248 _public_ int sd_event_add_exit(
1249 sd_event *e,
1250 sd_event_source **ret,
1251 sd_event_handler_t callback,
1252 void *userdata) {
1253
1254 _cleanup_(source_freep) sd_event_source *s = NULL;
1255 int r;
1256
1257 assert_return(e, -EINVAL);
1258 assert_return(e = event_resolve(e), -ENOPKG);
1259 assert_return(callback, -EINVAL);
1260 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1261 assert_return(!event_pid_changed(e), -ECHILD);
1262
1263 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1264 if (r < 0)
1265 return r;
1266
1267 s = source_new(e, !ret, SOURCE_EXIT);
1268 if (!s)
1269 return -ENOMEM;
1270
1271 s->exit.callback = callback;
1272 s->userdata = userdata;
1273 s->exit.prioq_index = PRIOQ_IDX_NULL;
1274 s->enabled = SD_EVENT_ONESHOT;
1275
1276 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
1277 if (r < 0)
1278 return r;
1279
1280 if (ret)
1281 *ret = s;
1282 TAKE_PTR(s);
1283
1284 return 0;
1285 }
1286
1287 static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1288 assert(e);
1289
1290 if (!d)
1291 return;
1292
1293 assert(hashmap_isempty(d->inodes));
1294 assert(hashmap_isempty(d->wd));
1295
1296 if (d->buffer_filled > 0)
1297 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1298
1299 hashmap_free(d->inodes);
1300 hashmap_free(d->wd);
1301
1302 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1303
1304 if (d->fd >= 0) {
1305 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1306 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1307
1308 safe_close(d->fd);
1309 }
1310 free(d);
1311 }
1312
1313 static int event_make_inotify_data(
1314 sd_event *e,
1315 int64_t priority,
1316 struct inotify_data **ret) {
1317
1318 _cleanup_close_ int fd = -1;
1319 struct inotify_data *d;
1320 struct epoll_event ev;
1321 int r;
1322
1323 assert(e);
1324
1325 d = hashmap_get(e->inotify_data, &priority);
1326 if (d) {
1327 if (ret)
1328 *ret = d;
1329 return 0;
1330 }
1331
1332 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1333 if (fd < 0)
1334 return -errno;
1335
1336 fd = fd_move_above_stdio(fd);
1337
1338 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1339 if (r < 0)
1340 return r;
1341
1342 d = new(struct inotify_data, 1);
1343 if (!d)
1344 return -ENOMEM;
1345
1346 *d = (struct inotify_data) {
1347 .wakeup = WAKEUP_INOTIFY_DATA,
1348 .fd = TAKE_FD(fd),
1349 .priority = priority,
1350 };
1351
1352 r = hashmap_put(e->inotify_data, &d->priority, d);
1353 if (r < 0) {
1354 d->fd = safe_close(d->fd);
1355 free(d);
1356 return r;
1357 }
1358
1359 ev = (struct epoll_event) {
1360 .events = EPOLLIN,
1361 .data.ptr = d,
1362 };
1363
1364 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1365 r = -errno;
1366 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1367 * remove the fd from the epoll first, which we don't want as we couldn't
1368 * add it in the first place. */
1369 event_free_inotify_data(e, d);
1370 return r;
1371 }
1372
1373 if (ret)
1374 *ret = d;
1375
1376 return 1;
1377 }
1378
1379 static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
1380 int r;
1381
1382 assert(x);
1383 assert(y);
1384
1385 r = CMP(x->dev, y->dev);
1386 if (r != 0)
1387 return r;
1388
1389 return CMP(x->ino, y->ino);
1390 }
1391
1392 static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1393 assert(d);
1394
1395 siphash24_compress(&d->dev, sizeof(d->dev), state);
1396 siphash24_compress(&d->ino, sizeof(d->ino), state);
1397 }
1398
1399 DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
1400
1401 static void event_free_inode_data(
1402 sd_event *e,
1403 struct inode_data *d) {
1404
1405 assert(e);
1406
1407 if (!d)
1408 return;
1409
1410 assert(!d->event_sources);
1411
1412 if (d->fd >= 0) {
1413 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1414 safe_close(d->fd);
1415 }
1416
1417 if (d->inotify_data) {
1418
1419 if (d->wd >= 0) {
1420 if (d->inotify_data->fd >= 0) {
1421 /* So here's a problem. At the time this runs the watch descriptor might already be
1422 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1423 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1424 * likely case to happen. */
1425
1426 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1427 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1428 }
1429
1430 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1431 }
1432
1433 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1434 }
1435
1436 free(d);
1437 }
1438
1439 static void event_gc_inode_data(
1440 sd_event *e,
1441 struct inode_data *d) {
1442
1443 struct inotify_data *inotify_data;
1444
1445 assert(e);
1446
1447 if (!d)
1448 return;
1449
1450 if (d->event_sources)
1451 return;
1452
1453 inotify_data = d->inotify_data;
1454 event_free_inode_data(e, d);
1455
1456 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1457 event_free_inotify_data(e, inotify_data);
1458 }
1459
1460 static int event_make_inode_data(
1461 sd_event *e,
1462 struct inotify_data *inotify_data,
1463 dev_t dev,
1464 ino_t ino,
1465 struct inode_data **ret) {
1466
1467 struct inode_data *d, key;
1468 int r;
1469
1470 assert(e);
1471 assert(inotify_data);
1472
1473 key = (struct inode_data) {
1474 .ino = ino,
1475 .dev = dev,
1476 };
1477
1478 d = hashmap_get(inotify_data->inodes, &key);
1479 if (d) {
1480 if (ret)
1481 *ret = d;
1482
1483 return 0;
1484 }
1485
1486 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1487 if (r < 0)
1488 return r;
1489
1490 d = new(struct inode_data, 1);
1491 if (!d)
1492 return -ENOMEM;
1493
1494 *d = (struct inode_data) {
1495 .dev = dev,
1496 .ino = ino,
1497 .wd = -1,
1498 .fd = -1,
1499 .inotify_data = inotify_data,
1500 };
1501
1502 r = hashmap_put(inotify_data->inodes, d, d);
1503 if (r < 0) {
1504 free(d);
1505 return r;
1506 }
1507
1508 if (ret)
1509 *ret = d;
1510
1511 return 1;
1512 }
1513
1514 static uint32_t inode_data_determine_mask(struct inode_data *d) {
1515 bool excl_unlink = true;
1516 uint32_t combined = 0;
1517 sd_event_source *s;
1518
1519 assert(d);
1520
1521 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1522 * the IN_EXCL_UNLINK flag is ANDed instead.
1523 *
1524 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1525 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
1526 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
1527 * events we don't care for client-side. */
1528
1529 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1530
1531 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1532 excl_unlink = false;
1533
1534 combined |= s->inotify.mask;
1535 }
1536
1537 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1538 }
1539
1540 static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1541 uint32_t combined_mask;
1542 int wd, r;
1543
1544 assert(d);
1545 assert(d->fd >= 0);
1546
1547 combined_mask = inode_data_determine_mask(d);
1548
1549 if (d->wd >= 0 && combined_mask == d->combined_mask)
1550 return 0;
1551
1552 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1553 if (r < 0)
1554 return r;
1555
1556 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1557 if (wd < 0)
1558 return -errno;
1559
1560 if (d->wd < 0) {
1561 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1562 if (r < 0) {
1563 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1564 return r;
1565 }
1566
1567 d->wd = wd;
1568
1569 } else if (d->wd != wd) {
1570
1571 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1572 (void) inotify_rm_watch(d->fd, wd);
1573 return -EINVAL;
1574 }
1575
1576 d->combined_mask = combined_mask;
1577 return 1;
1578 }
1579
1580 _public_ int sd_event_add_inotify(
1581 sd_event *e,
1582 sd_event_source **ret,
1583 const char *path,
1584 uint32_t mask,
1585 sd_event_inotify_handler_t callback,
1586 void *userdata) {
1587
1588 struct inotify_data *inotify_data = NULL;
1589 struct inode_data *inode_data = NULL;
1590 _cleanup_close_ int fd = -1;
1591 _cleanup_(source_freep) sd_event_source *s = NULL;
1592 struct stat st;
1593 int r;
1594
1595 assert_return(e, -EINVAL);
1596 assert_return(e = event_resolve(e), -ENOPKG);
1597 assert_return(path, -EINVAL);
1598 assert_return(callback, -EINVAL);
1599 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1600 assert_return(!event_pid_changed(e), -ECHILD);
1601
1602 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1603 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1604 * the user can't use them for us. */
1605 if (mask & IN_MASK_ADD)
1606 return -EINVAL;
1607
1608 fd = open(path, O_PATH|O_CLOEXEC|
1609 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1610 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1611 if (fd < 0)
1612 return -errno;
1613
1614 if (fstat(fd, &st) < 0)
1615 return -errno;
1616
1617 s = source_new(e, !ret, SOURCE_INOTIFY);
1618 if (!s)
1619 return -ENOMEM;
1620
1621 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1622 s->inotify.mask = mask;
1623 s->inotify.callback = callback;
1624 s->userdata = userdata;
1625
1626 /* Allocate an inotify object for this priority, and an inode object within it */
1627 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1628 if (r < 0)
1629 return r;
1630
1631 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
1632 if (r < 0) {
1633 event_free_inotify_data(e, inotify_data);
1634 return r;
1635 }
1636
1637 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1638 * the event source, until then, for which we need the original inode. */
1639 if (inode_data->fd < 0) {
1640 inode_data->fd = TAKE_FD(fd);
1641 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1642 }
1643
1644 /* Link our event source to the inode data object */
1645 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1646 s->inotify.inode_data = inode_data;
1647
1648 /* Actually realize the watch now */
1649 r = inode_data_realize_watch(e, inode_data);
1650 if (r < 0)
1651 return r;
1652
1653 (void) sd_event_source_set_description(s, path);
1654
1655 if (ret)
1656 *ret = s;
1657 TAKE_PTR(s);
1658
1659 return 0;
1660 }
1661
1662 static sd_event_source* event_source_free(sd_event_source *s) {
1663 if (!s)
1664 return NULL;
1665
1666 /* Here's a special hack: when we are called from a
1667 * dispatch handler we won't free the event source
1668 * immediately, but we will detach the fd from the
1669 * epoll. This way it is safe for the caller to unref
1670 * the event source and immediately close the fd, but
1671 * we still retain a valid event source object after
1672 * the callback. */
1673
1674 if (s->dispatching) {
1675 if (s->type == SOURCE_IO)
1676 source_io_unregister(s);
1677
1678 source_disconnect(s);
1679 } else
1680 source_free(s);
1681
1682 return NULL;
1683 }
1684
1685 DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1686
1687 _public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
1688 assert_return(s, -EINVAL);
1689 assert_return(!event_pid_changed(s->event), -ECHILD);
1690
1691 return free_and_strdup(&s->description, description);
1692 }
1693
1694 _public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
1695 assert_return(s, -EINVAL);
1696 assert_return(description, -EINVAL);
1697 assert_return(!event_pid_changed(s->event), -ECHILD);
1698
1699 if (!s->description)
1700 return -ENXIO;
1701
1702 *description = s->description;
1703 return 0;
1704 }
1705
1706 _public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
1707 assert_return(s, NULL);
1708
1709 return s->event;
1710 }
1711
1712 _public_ int sd_event_source_get_pending(sd_event_source *s) {
1713 assert_return(s, -EINVAL);
1714 assert_return(s->type != SOURCE_EXIT, -EDOM);
1715 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1716 assert_return(!event_pid_changed(s->event), -ECHILD);
1717
1718 return s->pending;
1719 }
1720
1721 _public_ int sd_event_source_get_io_fd(sd_event_source *s) {
1722 assert_return(s, -EINVAL);
1723 assert_return(s->type == SOURCE_IO, -EDOM);
1724 assert_return(!event_pid_changed(s->event), -ECHILD);
1725
1726 return s->io.fd;
1727 }
1728
1729 _public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1730 int r;
1731
1732 assert_return(s, -EINVAL);
1733 assert_return(fd >= 0, -EBADF);
1734 assert_return(s->type == SOURCE_IO, -EDOM);
1735 assert_return(!event_pid_changed(s->event), -ECHILD);
1736
1737 if (s->io.fd == fd)
1738 return 0;
1739
1740 if (s->enabled == SD_EVENT_OFF) {
1741 s->io.fd = fd;
1742 s->io.registered = false;
1743 } else {
1744 int saved_fd;
1745
1746 saved_fd = s->io.fd;
1747 assert(s->io.registered);
1748
1749 s->io.fd = fd;
1750 s->io.registered = false;
1751
1752 r = source_io_register(s, s->enabled, s->io.events);
1753 if (r < 0) {
1754 s->io.fd = saved_fd;
1755 s->io.registered = true;
1756 return r;
1757 }
1758
1759 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1760 }
1761
1762 return 0;
1763 }
1764
1765 _public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1766 assert_return(s, -EINVAL);
1767 assert_return(s->type == SOURCE_IO, -EDOM);
1768
1769 return s->io.owned;
1770 }
1771
1772 _public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1773 assert_return(s, -EINVAL);
1774 assert_return(s->type == SOURCE_IO, -EDOM);
1775
1776 s->io.owned = own;
1777 return 0;
1778 }
1779
1780 _public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
1781 assert_return(s, -EINVAL);
1782 assert_return(events, -EINVAL);
1783 assert_return(s->type == SOURCE_IO, -EDOM);
1784 assert_return(!event_pid_changed(s->event), -ECHILD);
1785
1786 *events = s->io.events;
1787 return 0;
1788 }
1789
1790 _public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
1791 int r;
1792
1793 assert_return(s, -EINVAL);
1794 assert_return(s->type == SOURCE_IO, -EDOM);
1795 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
1796 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1797 assert_return(!event_pid_changed(s->event), -ECHILD);
1798
1799 /* edge-triggered updates are never skipped, so we can reset edges */
1800 if (s->io.events == events && !(events & EPOLLET))
1801 return 0;
1802
1803 r = source_set_pending(s, false);
1804 if (r < 0)
1805 return r;
1806
1807 if (s->enabled != SD_EVENT_OFF) {
1808 r = source_io_register(s, s->enabled, events);
1809 if (r < 0)
1810 return r;
1811 }
1812
1813 s->io.events = events;
1814
1815 return 0;
1816 }
1817
1818 _public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
1819 assert_return(s, -EINVAL);
1820 assert_return(revents, -EINVAL);
1821 assert_return(s->type == SOURCE_IO, -EDOM);
1822 assert_return(s->pending, -ENODATA);
1823 assert_return(!event_pid_changed(s->event), -ECHILD);
1824
1825 *revents = s->io.revents;
1826 return 0;
1827 }
1828
1829 _public_ int sd_event_source_get_signal(sd_event_source *s) {
1830 assert_return(s, -EINVAL);
1831 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1832 assert_return(!event_pid_changed(s->event), -ECHILD);
1833
1834 return s->signal.sig;
1835 }
1836
1837 _public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
1838 assert_return(s, -EINVAL);
1839 assert_return(!event_pid_changed(s->event), -ECHILD);
1840
1841 *priority = s->priority;
1842 return 0;
1843 }
1844
1845 _public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
1846 bool rm_inotify = false, rm_inode = false;
1847 struct inotify_data *new_inotify_data = NULL;
1848 struct inode_data *new_inode_data = NULL;
1849 int r;
1850
1851 assert_return(s, -EINVAL);
1852 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
1853 assert_return(!event_pid_changed(s->event), -ECHILD);
1854
1855 if (s->priority == priority)
1856 return 0;
1857
1858 if (s->type == SOURCE_INOTIFY) {
1859 struct inode_data *old_inode_data;
1860
1861 assert(s->inotify.inode_data);
1862 old_inode_data = s->inotify.inode_data;
1863
1864 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1865 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1866 * events we allow priority changes only until the first following iteration. */
1867 if (old_inode_data->fd < 0)
1868 return -EOPNOTSUPP;
1869
1870 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
1871 if (r < 0)
1872 return r;
1873 rm_inotify = r > 0;
1874
1875 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
1876 if (r < 0)
1877 goto fail;
1878 rm_inode = r > 0;
1879
1880 if (new_inode_data->fd < 0) {
1881 /* Duplicate the fd for the new inode object if we don't have any yet */
1882 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
1883 if (new_inode_data->fd < 0) {
1884 r = -errno;
1885 goto fail;
1886 }
1887
1888 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
1889 }
1890
1891 /* Move the event source to the new inode data structure */
1892 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
1893 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
1894 s->inotify.inode_data = new_inode_data;
1895
1896 /* Now create the new watch */
1897 r = inode_data_realize_watch(s->event, new_inode_data);
1898 if (r < 0) {
1899 /* Move it back */
1900 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
1901 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
1902 s->inotify.inode_data = old_inode_data;
1903 goto fail;
1904 }
1905
1906 s->priority = priority;
1907
1908 event_gc_inode_data(s->event, old_inode_data);
1909
1910 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
1911 struct signal_data *old, *d;
1912
1913 /* Move us from the signalfd belonging to the old
1914 * priority to the signalfd of the new priority */
1915
1916 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1917
1918 s->priority = priority;
1919
1920 r = event_make_signal_data(s->event, s->signal.sig, &d);
1921 if (r < 0) {
1922 s->priority = old->priority;
1923 return r;
1924 }
1925
1926 event_unmask_signal_data(s->event, old, s->signal.sig);
1927 } else
1928 s->priority = priority;
1929
1930 if (s->pending)
1931 prioq_reshuffle(s->event->pending, s, &s->pending_index);
1932
1933 if (s->prepare)
1934 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
1935
1936 if (s->type == SOURCE_EXIT)
1937 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
1938
1939 return 0;
1940
1941 fail:
1942 if (rm_inode)
1943 event_free_inode_data(s->event, new_inode_data);
1944
1945 if (rm_inotify)
1946 event_free_inotify_data(s->event, new_inotify_data);
1947
1948 return r;
1949 }
1950
1951 _public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
1952 assert_return(s, -EINVAL);
1953 assert_return(!event_pid_changed(s->event), -ECHILD);
1954
1955 if (m)
1956 *m = s->enabled;
1957 return s->enabled != SD_EVENT_OFF;
1958 }
1959
1960 _public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
1961 int r;
1962
1963 assert_return(s, -EINVAL);
1964 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
1965 assert_return(!event_pid_changed(s->event), -ECHILD);
1966
1967 /* If we are dead anyway, we are fine with turning off
1968 * sources, but everything else needs to fail. */
1969 if (s->event->state == SD_EVENT_FINISHED)
1970 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1971
1972 if (s->enabled == m)
1973 return 0;
1974
1975 if (m == SD_EVENT_OFF) {
1976
1977 /* Unset the pending flag when this event source is disabled */
1978 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1979 r = source_set_pending(s, false);
1980 if (r < 0)
1981 return r;
1982 }
1983
1984 switch (s->type) {
1985
1986 case SOURCE_IO:
1987 source_io_unregister(s);
1988 s->enabled = m;
1989 break;
1990
1991 case SOURCE_TIME_REALTIME:
1992 case SOURCE_TIME_BOOTTIME:
1993 case SOURCE_TIME_MONOTONIC:
1994 case SOURCE_TIME_REALTIME_ALARM:
1995 case SOURCE_TIME_BOOTTIME_ALARM: {
1996 struct clock_data *d;
1997
1998 s->enabled = m;
1999 d = event_get_clock_data(s->event, s->type);
2000 assert(d);
2001
2002 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2003 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2004 d->needs_rearm = true;
2005 break;
2006 }
2007
2008 case SOURCE_SIGNAL:
2009 s->enabled = m;
2010
2011 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2012 break;
2013
2014 case SOURCE_CHILD:
2015 s->enabled = m;
2016
2017 assert(s->event->n_enabled_child_sources > 0);
2018 s->event->n_enabled_child_sources--;
2019
2020 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2021 break;
2022
2023 case SOURCE_EXIT:
2024 s->enabled = m;
2025 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2026 break;
2027
2028 case SOURCE_DEFER:
2029 case SOURCE_POST:
2030 case SOURCE_INOTIFY:
2031 s->enabled = m;
2032 break;
2033
2034 default:
2035 assert_not_reached("Wut? I shouldn't exist.");
2036 }
2037
2038 } else {
2039
2040 /* Unset the pending flag when this event source is enabled */
2041 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2042 r = source_set_pending(s, false);
2043 if (r < 0)
2044 return r;
2045 }
2046
2047 switch (s->type) {
2048
2049 case SOURCE_IO:
2050 r = source_io_register(s, m, s->io.events);
2051 if (r < 0)
2052 return r;
2053
2054 s->enabled = m;
2055 break;
2056
2057 case SOURCE_TIME_REALTIME:
2058 case SOURCE_TIME_BOOTTIME:
2059 case SOURCE_TIME_MONOTONIC:
2060 case SOURCE_TIME_REALTIME_ALARM:
2061 case SOURCE_TIME_BOOTTIME_ALARM: {
2062 struct clock_data *d;
2063
2064 s->enabled = m;
2065 d = event_get_clock_data(s->event, s->type);
2066 assert(d);
2067
2068 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2069 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2070 d->needs_rearm = true;
2071 break;
2072 }
2073
2074 case SOURCE_SIGNAL:
2075
2076 s->enabled = m;
2077
2078 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2079 if (r < 0) {
2080 s->enabled = SD_EVENT_OFF;
2081 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2082 return r;
2083 }
2084
2085 break;
2086
2087 case SOURCE_CHILD:
2088
2089 if (s->enabled == SD_EVENT_OFF)
2090 s->event->n_enabled_child_sources++;
2091
2092 s->enabled = m;
2093
2094 r = event_make_signal_data(s->event, SIGCHLD, NULL);
2095 if (r < 0) {
2096 s->enabled = SD_EVENT_OFF;
2097 s->event->n_enabled_child_sources--;
2098 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2099 return r;
2100 }
2101
2102 break;
2103
2104 case SOURCE_EXIT:
2105 s->enabled = m;
2106 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
2107 break;
2108
2109 case SOURCE_DEFER:
2110 case SOURCE_POST:
2111 case SOURCE_INOTIFY:
2112 s->enabled = m;
2113 break;
2114
2115 default:
2116 assert_not_reached("Wut? I shouldn't exist.");
2117 }
2118 }
2119
2120 if (s->pending)
2121 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2122
2123 if (s->prepare)
2124 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2125
2126 return 0;
2127 }
2128
2129 _public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
2130 assert_return(s, -EINVAL);
2131 assert_return(usec, -EINVAL);
2132 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2133 assert_return(!event_pid_changed(s->event), -ECHILD);
2134
2135 *usec = s->time.next;
2136 return 0;
2137 }
2138
2139 _public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
2140 struct clock_data *d;
2141 int r;
2142
2143 assert_return(s, -EINVAL);
2144 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2145 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2146 assert_return(!event_pid_changed(s->event), -ECHILD);
2147
2148 r = source_set_pending(s, false);
2149 if (r < 0)
2150 return r;
2151
2152 s->time.next = usec;
2153
2154 d = event_get_clock_data(s->event, s->type);
2155 assert(d);
2156
2157 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2158 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2159 d->needs_rearm = true;
2160
2161 return 0;
2162 }
2163
2164 _public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
2165 assert_return(s, -EINVAL);
2166 assert_return(usec, -EINVAL);
2167 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2168 assert_return(!event_pid_changed(s->event), -ECHILD);
2169
2170 *usec = s->time.accuracy;
2171 return 0;
2172 }
2173
2174 _public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
2175 struct clock_data *d;
2176 int r;
2177
2178 assert_return(s, -EINVAL);
2179 assert_return(usec != (uint64_t) -1, -EINVAL);
2180 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2181 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2182 assert_return(!event_pid_changed(s->event), -ECHILD);
2183
2184 r = source_set_pending(s, false);
2185 if (r < 0)
2186 return r;
2187
2188 if (usec == 0)
2189 usec = DEFAULT_ACCURACY_USEC;
2190
2191 s->time.accuracy = usec;
2192
2193 d = event_get_clock_data(s->event, s->type);
2194 assert(d);
2195
2196 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2197 d->needs_rearm = true;
2198
2199 return 0;
2200 }
2201
2202 _public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2203 assert_return(s, -EINVAL);
2204 assert_return(clock, -EINVAL);
2205 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2206 assert_return(!event_pid_changed(s->event), -ECHILD);
2207
2208 *clock = event_source_type_to_clock(s->type);
2209 return 0;
2210 }
2211
2212 _public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
2213 assert_return(s, -EINVAL);
2214 assert_return(pid, -EINVAL);
2215 assert_return(s->type == SOURCE_CHILD, -EDOM);
2216 assert_return(!event_pid_changed(s->event), -ECHILD);
2217
2218 *pid = s->child.pid;
2219 return 0;
2220 }
2221
2222 _public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2223 assert_return(s, -EINVAL);
2224 assert_return(mask, -EINVAL);
2225 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2226 assert_return(!event_pid_changed(s->event), -ECHILD);
2227
2228 *mask = s->inotify.mask;
2229 return 0;
2230 }
2231
2232 _public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
2233 int r;
2234
2235 assert_return(s, -EINVAL);
2236 assert_return(s->type != SOURCE_EXIT, -EDOM);
2237 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2238 assert_return(!event_pid_changed(s->event), -ECHILD);
2239
2240 if (s->prepare == callback)
2241 return 0;
2242
2243 if (callback && s->prepare) {
2244 s->prepare = callback;
2245 return 0;
2246 }
2247
2248 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2249 if (r < 0)
2250 return r;
2251
2252 s->prepare = callback;
2253
2254 if (callback) {
2255 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2256 if (r < 0)
2257 return r;
2258 } else
2259 prioq_remove(s->event->prepare, s, &s->prepare_index);
2260
2261 return 0;
2262 }
2263
2264 _public_ void* sd_event_source_get_userdata(sd_event_source *s) {
2265 assert_return(s, NULL);
2266
2267 return s->userdata;
2268 }
2269
2270 _public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2271 void *ret;
2272
2273 assert_return(s, NULL);
2274
2275 ret = s->userdata;
2276 s->userdata = userdata;
2277
2278 return ret;
2279 }
2280
2281 static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2282 usec_t c;
2283 assert(e);
2284 assert(a <= b);
2285
2286 if (a <= 0)
2287 return 0;
2288 if (a >= USEC_INFINITY)
2289 return USEC_INFINITY;
2290
2291 if (b <= a + 1)
2292 return a;
2293
2294 initialize_perturb(e);
2295
2296 /*
2297 Find a good time to wake up again between times a and b. We
2298 have two goals here:
2299
2300 a) We want to wake up as seldom as possible, hence prefer
2301 later times over earlier times.
2302
2303 b) But if we have to wake up, then let's make sure to
2304 dispatch as much as possible on the entire system.
2305
2306 We implement this by waking up everywhere at the same time
2307 within any given minute if we can, synchronised via the
2308 perturbation value determined from the boot ID. If we can't,
2309 then we try to find the same spot in every 10s, then 1s and
2310 then 250ms step. Otherwise, we pick the last possible time
2311 to wake up.
2312 */
2313
2314 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2315 if (c >= b) {
2316 if (_unlikely_(c < USEC_PER_MINUTE))
2317 return b;
2318
2319 c -= USEC_PER_MINUTE;
2320 }
2321
2322 if (c >= a)
2323 return c;
2324
2325 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2326 if (c >= b) {
2327 if (_unlikely_(c < USEC_PER_SEC*10))
2328 return b;
2329
2330 c -= USEC_PER_SEC*10;
2331 }
2332
2333 if (c >= a)
2334 return c;
2335
2336 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
2337 if (c >= b) {
2338 if (_unlikely_(c < USEC_PER_SEC))
2339 return b;
2340
2341 c -= USEC_PER_SEC;
2342 }
2343
2344 if (c >= a)
2345 return c;
2346
2347 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2348 if (c >= b) {
2349 if (_unlikely_(c < USEC_PER_MSEC*250))
2350 return b;
2351
2352 c -= USEC_PER_MSEC*250;
2353 }
2354
2355 if (c >= a)
2356 return c;
2357
2358 return b;
2359 }
2360
2361 static int event_arm_timer(
2362 sd_event *e,
2363 struct clock_data *d) {
2364
2365 struct itimerspec its = {};
2366 sd_event_source *a, *b;
2367 usec_t t;
2368 int r;
2369
2370 assert(e);
2371 assert(d);
2372
2373 if (!d->needs_rearm)
2374 return 0;
2375 else
2376 d->needs_rearm = false;
2377
2378 a = prioq_peek(d->earliest);
2379 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
2380
2381 if (d->fd < 0)
2382 return 0;
2383
2384 if (d->next == USEC_INFINITY)
2385 return 0;
2386
2387 /* disarm */
2388 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2389 if (r < 0)
2390 return r;
2391
2392 d->next = USEC_INFINITY;
2393 return 0;
2394 }
2395
2396 b = prioq_peek(d->latest);
2397 assert_se(b && b->enabled != SD_EVENT_OFF);
2398
2399 t = sleep_between(e, a->time.next, time_event_source_latest(b));
2400 if (d->next == t)
2401 return 0;
2402
2403 assert_se(d->fd >= 0);
2404
2405 if (t == 0) {
2406 /* We don' want to disarm here, just mean some time looooong ago. */
2407 its.it_value.tv_sec = 0;
2408 its.it_value.tv_nsec = 1;
2409 } else
2410 timespec_store(&its.it_value, t);
2411
2412 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
2413 if (r < 0)
2414 return -errno;
2415
2416 d->next = t;
2417 return 0;
2418 }
2419
2420 static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
2421 assert(e);
2422 assert(s);
2423 assert(s->type == SOURCE_IO);
2424
2425 /* If the event source was already pending, we just OR in the
2426 * new revents, otherwise we reset the value. The ORing is
2427 * necessary to handle EPOLLONESHOT events properly where
2428 * readability might happen independently of writability, and
2429 * we need to keep track of both */
2430
2431 if (s->pending)
2432 s->io.revents |= revents;
2433 else
2434 s->io.revents = revents;
2435
2436 return source_set_pending(s, true);
2437 }
2438
2439 static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
2440 uint64_t x;
2441 ssize_t ss;
2442
2443 assert(e);
2444 assert(fd >= 0);
2445
2446 assert_return(events == EPOLLIN, -EIO);
2447
2448 ss = read(fd, &x, sizeof(x));
2449 if (ss < 0) {
2450 if (IN_SET(errno, EAGAIN, EINTR))
2451 return 0;
2452
2453 return -errno;
2454 }
2455
2456 if (_unlikely_(ss != sizeof(x)))
2457 return -EIO;
2458
2459 if (next)
2460 *next = USEC_INFINITY;
2461
2462 return 0;
2463 }
2464
2465 static int process_timer(
2466 sd_event *e,
2467 usec_t n,
2468 struct clock_data *d) {
2469
2470 sd_event_source *s;
2471 int r;
2472
2473 assert(e);
2474 assert(d);
2475
2476 for (;;) {
2477 s = prioq_peek(d->earliest);
2478 if (!s ||
2479 s->time.next > n ||
2480 s->enabled == SD_EVENT_OFF ||
2481 s->pending)
2482 break;
2483
2484 r = source_set_pending(s, true);
2485 if (r < 0)
2486 return r;
2487
2488 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2489 prioq_reshuffle(d->latest, s, &s->time.latest_index);
2490 d->needs_rearm = true;
2491 }
2492
2493 return 0;
2494 }
2495
2496 static int process_child(sd_event *e) {
2497 sd_event_source *s;
2498 Iterator i;
2499 int r;
2500
2501 assert(e);
2502
2503 e->need_process_child = false;
2504
2505 /*
2506 So, this is ugly. We iteratively invoke waitid() with P_PID
2507 + WNOHANG for each PID we wait for, instead of using
2508 P_ALL. This is because we only want to get child
2509 information of very specific child processes, and not all
2510 of them. We might not have processed the SIGCHLD even of a
2511 previous invocation and we don't want to maintain a
2512 unbounded *per-child* event queue, hence we really don't
2513 want anything flushed out of the kernel's queue that we
2514 don't care about. Since this is O(n) this means that if you
2515 have a lot of processes you probably want to handle SIGCHLD
2516 yourself.
2517
2518 We do not reap the children here (by using WNOWAIT), this
2519 is only done after the event source is dispatched so that
2520 the callback still sees the process as a zombie.
2521 */
2522
2523 HASHMAP_FOREACH(s, e->child_sources, i) {
2524 assert(s->type == SOURCE_CHILD);
2525
2526 if (s->pending)
2527 continue;
2528
2529 if (s->enabled == SD_EVENT_OFF)
2530 continue;
2531
2532 zero(s->child.siginfo);
2533 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2534 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
2535 if (r < 0)
2536 return -errno;
2537
2538 if (s->child.siginfo.si_pid != 0) {
2539 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2540
2541 if (!zombie && (s->child.options & WEXITED)) {
2542 /* If the child isn't dead then let's
2543 * immediately remove the state change
2544 * from the queue, since there's no
2545 * benefit in leaving it queued */
2546
2547 assert(s->child.options & (WSTOPPED|WCONTINUED));
2548 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2549 }
2550
2551 r = source_set_pending(s, true);
2552 if (r < 0)
2553 return r;
2554 }
2555 }
2556
2557 return 0;
2558 }
2559
2560 static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
2561 bool read_one = false;
2562 int r;
2563
2564 assert(e);
2565 assert(d);
2566 assert_return(events == EPOLLIN, -EIO);
2567
2568 /* If there's a signal queued on this priority and SIGCHLD is
2569 on this priority too, then make sure to recheck the
2570 children we watch. This is because we only ever dequeue
2571 the first signal per priority, and if we dequeue one, and
2572 SIGCHLD might be enqueued later we wouldn't know, but we
2573 might have higher priority children we care about hence we
2574 need to check that explicitly. */
2575
2576 if (sigismember(&d->sigset, SIGCHLD))
2577 e->need_process_child = true;
2578
2579 /* If there's already an event source pending for this
2580 * priority we don't read another */
2581 if (d->current)
2582 return 0;
2583
2584 for (;;) {
2585 struct signalfd_siginfo si;
2586 ssize_t n;
2587 sd_event_source *s = NULL;
2588
2589 n = read(d->fd, &si, sizeof(si));
2590 if (n < 0) {
2591 if (IN_SET(errno, EAGAIN, EINTR))
2592 return read_one;
2593
2594 return -errno;
2595 }
2596
2597 if (_unlikely_(n != sizeof(si)))
2598 return -EIO;
2599
2600 assert(SIGNAL_VALID(si.ssi_signo));
2601
2602 read_one = true;
2603
2604 if (e->signal_sources)
2605 s = e->signal_sources[si.ssi_signo];
2606 if (!s)
2607 continue;
2608 if (s->pending)
2609 continue;
2610
2611 s->signal.siginfo = si;
2612 d->current = s;
2613
2614 r = source_set_pending(s, true);
2615 if (r < 0)
2616 return r;
2617
2618 return 1;
2619 }
2620 }
2621
2622 static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2623 ssize_t n;
2624
2625 assert(e);
2626 assert(d);
2627
2628 assert_return(revents == EPOLLIN, -EIO);
2629
2630 /* If there's already an event source pending for this priority, don't read another */
2631 if (d->n_pending > 0)
2632 return 0;
2633
2634 /* Is the read buffer non-empty? If so, let's not read more */
2635 if (d->buffer_filled > 0)
2636 return 0;
2637
2638 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2639 if (n < 0) {
2640 if (IN_SET(errno, EAGAIN, EINTR))
2641 return 0;
2642
2643 return -errno;
2644 }
2645
2646 assert(n > 0);
2647 d->buffer_filled = (size_t) n;
2648 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2649
2650 return 1;
2651 }
2652
2653 static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2654 assert(e);
2655 assert(d);
2656 assert(sz <= d->buffer_filled);
2657
2658 if (sz == 0)
2659 return;
2660
2661 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2662 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2663 d->buffer_filled -= sz;
2664
2665 if (d->buffer_filled == 0)
2666 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2667 }
2668
2669 static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2670 int r;
2671
2672 assert(e);
2673 assert(d);
2674
2675 /* If there's already an event source pending for this priority, don't read another */
2676 if (d->n_pending > 0)
2677 return 0;
2678
2679 while (d->buffer_filled > 0) {
2680 size_t sz;
2681
2682 /* Let's validate that the event structures are complete */
2683 if (d->buffer_filled < offsetof(struct inotify_event, name))
2684 return -EIO;
2685
2686 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2687 if (d->buffer_filled < sz)
2688 return -EIO;
2689
2690 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2691 struct inode_data *inode_data;
2692 Iterator i;
2693
2694 /* The queue overran, let's pass this event to all event sources connected to this inotify
2695 * object */
2696
2697 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2698 sd_event_source *s;
2699
2700 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2701
2702 if (s->enabled == SD_EVENT_OFF)
2703 continue;
2704
2705 r = source_set_pending(s, true);
2706 if (r < 0)
2707 return r;
2708 }
2709 }
2710 } else {
2711 struct inode_data *inode_data;
2712 sd_event_source *s;
2713
2714 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2715 * our watch descriptor table. */
2716 if (d->buffer.ev.mask & IN_IGNORED) {
2717
2718 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2719 if (!inode_data) {
2720 event_inotify_data_drop(e, d, sz);
2721 continue;
2722 }
2723
2724 /* The watch descriptor was removed by the kernel, let's drop it here too */
2725 inode_data->wd = -1;
2726 } else {
2727 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2728 if (!inode_data) {
2729 event_inotify_data_drop(e, d, sz);
2730 continue;
2731 }
2732 }
2733
2734 /* Trigger all event sources that are interested in these events. Also trigger all event
2735 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2736 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2737
2738 if (s->enabled == SD_EVENT_OFF)
2739 continue;
2740
2741 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2742 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2743 continue;
2744
2745 r = source_set_pending(s, true);
2746 if (r < 0)
2747 return r;
2748 }
2749 }
2750
2751 /* Something pending now? If so, let's finish, otherwise let's read more. */
2752 if (d->n_pending > 0)
2753 return 1;
2754 }
2755
2756 return 0;
2757 }
2758
2759 static int process_inotify(sd_event *e) {
2760 struct inotify_data *d;
2761 int r, done = 0;
2762
2763 assert(e);
2764
2765 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2766 r = event_inotify_data_process(e, d);
2767 if (r < 0)
2768 return r;
2769 if (r > 0)
2770 done ++;
2771 }
2772
2773 return done;
2774 }
2775
2776 static int source_dispatch(sd_event_source *s) {
2777 EventSourceType saved_type;
2778 int r = 0;
2779
2780 assert(s);
2781 assert(s->pending || s->type == SOURCE_EXIT);
2782
2783 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2784 * the event. */
2785 saved_type = s->type;
2786
2787 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2788 r = source_set_pending(s, false);
2789 if (r < 0)
2790 return r;
2791 }
2792
2793 if (s->type != SOURCE_POST) {
2794 sd_event_source *z;
2795 Iterator i;
2796
2797 /* If we execute a non-post source, let's mark all
2798 * post sources as pending */
2799
2800 SET_FOREACH(z, s->event->post_sources, i) {
2801 if (z->enabled == SD_EVENT_OFF)
2802 continue;
2803
2804 r = source_set_pending(z, true);
2805 if (r < 0)
2806 return r;
2807 }
2808 }
2809
2810 if (s->enabled == SD_EVENT_ONESHOT) {
2811 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
2812 if (r < 0)
2813 return r;
2814 }
2815
2816 s->dispatching = true;
2817
2818 switch (s->type) {
2819
2820 case SOURCE_IO:
2821 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2822 break;
2823
2824 case SOURCE_TIME_REALTIME:
2825 case SOURCE_TIME_BOOTTIME:
2826 case SOURCE_TIME_MONOTONIC:
2827 case SOURCE_TIME_REALTIME_ALARM:
2828 case SOURCE_TIME_BOOTTIME_ALARM:
2829 r = s->time.callback(s, s->time.next, s->userdata);
2830 break;
2831
2832 case SOURCE_SIGNAL:
2833 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2834 break;
2835
2836 case SOURCE_CHILD: {
2837 bool zombie;
2838
2839 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
2840
2841 r = s->child.callback(s, &s->child.siginfo, s->userdata);
2842
2843 /* Now, reap the PID for good. */
2844 if (zombie)
2845 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
2846
2847 break;
2848 }
2849
2850 case SOURCE_DEFER:
2851 r = s->defer.callback(s, s->userdata);
2852 break;
2853
2854 case SOURCE_POST:
2855 r = s->post.callback(s, s->userdata);
2856 break;
2857
2858 case SOURCE_EXIT:
2859 r = s->exit.callback(s, s->userdata);
2860 break;
2861
2862 case SOURCE_INOTIFY: {
2863 struct sd_event *e = s->event;
2864 struct inotify_data *d;
2865 size_t sz;
2866
2867 assert(s->inotify.inode_data);
2868 assert_se(d = s->inotify.inode_data->inotify_data);
2869
2870 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
2871 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2872 assert(d->buffer_filled >= sz);
2873
2874 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
2875
2876 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2877 * buffer. */
2878 if (d->n_pending == 0)
2879 event_inotify_data_drop(e, d, sz);
2880
2881 break;
2882 }
2883
2884 case SOURCE_WATCHDOG:
2885 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
2886 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
2887 assert_not_reached("Wut? I shouldn't exist.");
2888 }
2889
2890 s->dispatching = false;
2891
2892 if (r < 0)
2893 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
2894 strna(s->description), event_source_type_to_string(saved_type));
2895
2896 if (s->n_ref == 0)
2897 source_free(s);
2898 else if (r < 0)
2899 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2900
2901 return 1;
2902 }
2903
2904 static int event_prepare(sd_event *e) {
2905 int r;
2906
2907 assert(e);
2908
2909 for (;;) {
2910 sd_event_source *s;
2911
2912 s = prioq_peek(e->prepare);
2913 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
2914 break;
2915
2916 s->prepare_iteration = e->iteration;
2917 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2918 if (r < 0)
2919 return r;
2920
2921 assert(s->prepare);
2922
2923 s->dispatching = true;
2924 r = s->prepare(s, s->userdata);
2925 s->dispatching = false;
2926
2927 if (r < 0)
2928 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2929 strna(s->description), event_source_type_to_string(s->type));
2930
2931 if (s->n_ref == 0)
2932 source_free(s);
2933 else if (r < 0)
2934 sd_event_source_set_enabled(s, SD_EVENT_OFF);
2935 }
2936
2937 return 0;
2938 }
2939
2940 static int dispatch_exit(sd_event *e) {
2941 sd_event_source *p;
2942 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
2943 int r;
2944
2945 assert(e);
2946
2947 p = prioq_peek(e->exit);
2948 if (!p || p->enabled == SD_EVENT_OFF) {
2949 e->state = SD_EVENT_FINISHED;
2950 return 0;
2951 }
2952
2953 ref = sd_event_ref(e);
2954 e->iteration++;
2955 e->state = SD_EVENT_EXITING;
2956 r = source_dispatch(p);
2957 e->state = SD_EVENT_INITIAL;
2958 return r;
2959 }
2960
2961 static sd_event_source* event_next_pending(sd_event *e) {
2962 sd_event_source *p;
2963
2964 assert(e);
2965
2966 p = prioq_peek(e->pending);
2967 if (!p)
2968 return NULL;
2969
2970 if (p->enabled == SD_EVENT_OFF)
2971 return NULL;
2972
2973 return p;
2974 }
2975
2976 static int arm_watchdog(sd_event *e) {
2977 struct itimerspec its = {};
2978 usec_t t;
2979 int r;
2980
2981 assert(e);
2982 assert(e->watchdog_fd >= 0);
2983
2984 t = sleep_between(e,
2985 e->watchdog_last + (e->watchdog_period / 2),
2986 e->watchdog_last + (e->watchdog_period * 3 / 4));
2987
2988 timespec_store(&its.it_value, t);
2989
2990 /* Make sure we never set the watchdog to 0, which tells the
2991 * kernel to disable it. */
2992 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2993 its.it_value.tv_nsec = 1;
2994
2995 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2996 if (r < 0)
2997 return -errno;
2998
2999 return 0;
3000 }
3001
3002 static int process_watchdog(sd_event *e) {
3003 assert(e);
3004
3005 if (!e->watchdog)
3006 return 0;
3007
3008 /* Don't notify watchdog too often */
3009 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3010 return 0;
3011
3012 sd_notify(false, "WATCHDOG=1");
3013 e->watchdog_last = e->timestamp.monotonic;
3014
3015 return arm_watchdog(e);
3016 }
3017
3018 static void event_close_inode_data_fds(sd_event *e) {
3019 struct inode_data *d;
3020
3021 assert(e);
3022
3023 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3024 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3025 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3026 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3027 * compromise. */
3028
3029 while ((d = e->inode_data_to_close)) {
3030 assert(d->fd >= 0);
3031 d->fd = safe_close(d->fd);
3032
3033 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3034 }
3035 }
3036
3037 _public_ int sd_event_prepare(sd_event *e) {
3038 int r;
3039
3040 assert_return(e, -EINVAL);
3041 assert_return(e = event_resolve(e), -ENOPKG);
3042 assert_return(!event_pid_changed(e), -ECHILD);
3043 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3044 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3045
3046 if (e->exit_requested)
3047 goto pending;
3048
3049 e->iteration++;
3050
3051 e->state = SD_EVENT_PREPARING;
3052 r = event_prepare(e);
3053 e->state = SD_EVENT_INITIAL;
3054 if (r < 0)
3055 return r;
3056
3057 r = event_arm_timer(e, &e->realtime);
3058 if (r < 0)
3059 return r;
3060
3061 r = event_arm_timer(e, &e->boottime);
3062 if (r < 0)
3063 return r;
3064
3065 r = event_arm_timer(e, &e->monotonic);
3066 if (r < 0)
3067 return r;
3068
3069 r = event_arm_timer(e, &e->realtime_alarm);
3070 if (r < 0)
3071 return r;
3072
3073 r = event_arm_timer(e, &e->boottime_alarm);
3074 if (r < 0)
3075 return r;
3076
3077 event_close_inode_data_fds(e);
3078
3079 if (event_next_pending(e) || e->need_process_child)
3080 goto pending;
3081
3082 e->state = SD_EVENT_ARMED;
3083
3084 return 0;
3085
3086 pending:
3087 e->state = SD_EVENT_ARMED;
3088 r = sd_event_wait(e, 0);
3089 if (r == 0)
3090 e->state = SD_EVENT_ARMED;
3091
3092 return r;
3093 }
3094
3095 _public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3096 struct epoll_event *ev_queue;
3097 unsigned ev_queue_max;
3098 int r, m, i;
3099
3100 assert_return(e, -EINVAL);
3101 assert_return(e = event_resolve(e), -ENOPKG);
3102 assert_return(!event_pid_changed(e), -ECHILD);
3103 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3104 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
3105
3106 if (e->exit_requested) {
3107 e->state = SD_EVENT_PENDING;
3108 return 1;
3109 }
3110
3111 ev_queue_max = MAX(e->n_sources, 1u);
3112 ev_queue = newa(struct epoll_event, ev_queue_max);
3113
3114 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3115 if (e->inotify_data_buffered)
3116 timeout = 0;
3117
3118 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
3119 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
3120 if (m < 0) {
3121 if (errno == EINTR) {
3122 e->state = SD_EVENT_PENDING;
3123 return 1;
3124 }
3125
3126 r = -errno;
3127 goto finish;
3128 }
3129
3130 triple_timestamp_get(&e->timestamp);
3131
3132 for (i = 0; i < m; i++) {
3133
3134 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
3135 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
3136 else {
3137 WakeupType *t = ev_queue[i].data.ptr;
3138
3139 switch (*t) {
3140
3141 case WAKEUP_EVENT_SOURCE:
3142 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3143 break;
3144
3145 case WAKEUP_CLOCK_DATA: {
3146 struct clock_data *d = ev_queue[i].data.ptr;
3147 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3148 break;
3149 }
3150
3151 case WAKEUP_SIGNAL_DATA:
3152 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3153 break;
3154
3155 case WAKEUP_INOTIFY_DATA:
3156 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3157 break;
3158
3159 default:
3160 assert_not_reached("Invalid wake-up pointer");
3161 }
3162 }
3163 if (r < 0)
3164 goto finish;
3165 }
3166
3167 r = process_watchdog(e);
3168 if (r < 0)
3169 goto finish;
3170
3171 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3172 if (r < 0)
3173 goto finish;
3174
3175 r = process_timer(e, e->timestamp.boottime, &e->boottime);
3176 if (r < 0)
3177 goto finish;
3178
3179 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3180 if (r < 0)
3181 goto finish;
3182
3183 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
3184 if (r < 0)
3185 goto finish;
3186
3187 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
3188 if (r < 0)
3189 goto finish;
3190
3191 if (e->need_process_child) {
3192 r = process_child(e);
3193 if (r < 0)
3194 goto finish;
3195 }
3196
3197 r = process_inotify(e);
3198 if (r < 0)
3199 goto finish;
3200
3201 if (event_next_pending(e)) {
3202 e->state = SD_EVENT_PENDING;
3203
3204 return 1;
3205 }
3206
3207 r = 0;
3208
3209 finish:
3210 e->state = SD_EVENT_INITIAL;
3211
3212 return r;
3213 }
3214
3215 _public_ int sd_event_dispatch(sd_event *e) {
3216 sd_event_source *p;
3217 int r;
3218
3219 assert_return(e, -EINVAL);
3220 assert_return(e = event_resolve(e), -ENOPKG);
3221 assert_return(!event_pid_changed(e), -ECHILD);
3222 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3223 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3224
3225 if (e->exit_requested)
3226 return dispatch_exit(e);
3227
3228 p = event_next_pending(e);
3229 if (p) {
3230 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3231
3232 ref = sd_event_ref(e);
3233 e->state = SD_EVENT_RUNNING;
3234 r = source_dispatch(p);
3235 e->state = SD_EVENT_INITIAL;
3236 return r;
3237 }
3238
3239 e->state = SD_EVENT_INITIAL;
3240
3241 return 1;
3242 }
3243
3244 static void event_log_delays(sd_event *e) {
3245 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
3246 unsigned i;
3247 int o;
3248
3249 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3250 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3251 e->delays[i] = 0;
3252 }
3253 log_debug("Event loop iterations: %.*s", o, b);
3254 }
3255
3256 _public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3257 int r;
3258
3259 assert_return(e, -EINVAL);
3260 assert_return(e = event_resolve(e), -ENOPKG);
3261 assert_return(!event_pid_changed(e), -ECHILD);
3262 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3263 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3264
3265 if (e->profile_delays && e->last_run) {
3266 usec_t this_run;
3267 unsigned l;
3268
3269 this_run = now(CLOCK_MONOTONIC);
3270
3271 l = u64log2(this_run - e->last_run);
3272 assert(l < sizeof(e->delays));
3273 e->delays[l]++;
3274
3275 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3276 event_log_delays(e);
3277 e->last_log = this_run;
3278 }
3279 }
3280
3281 r = sd_event_prepare(e);
3282 if (r == 0)
3283 /* There was nothing? Then wait... */
3284 r = sd_event_wait(e, timeout);
3285
3286 if (e->profile_delays)
3287 e->last_run = now(CLOCK_MONOTONIC);
3288
3289 if (r > 0) {
3290 /* There's something now, then let's dispatch it */
3291 r = sd_event_dispatch(e);
3292 if (r < 0)
3293 return r;
3294
3295 return 1;
3296 }
3297
3298 return r;
3299 }
3300
3301 _public_ int sd_event_loop(sd_event *e) {
3302 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
3303 int r;
3304
3305 assert_return(e, -EINVAL);
3306 assert_return(e = event_resolve(e), -ENOPKG);
3307 assert_return(!event_pid_changed(e), -ECHILD);
3308 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
3309
3310 ref = sd_event_ref(e);
3311
3312 while (e->state != SD_EVENT_FINISHED) {
3313 r = sd_event_run(e, (uint64_t) -1);
3314 if (r < 0)
3315 return r;
3316 }
3317
3318 return e->exit_code;
3319 }
3320
3321 _public_ int sd_event_get_fd(sd_event *e) {
3322
3323 assert_return(e, -EINVAL);
3324 assert_return(e = event_resolve(e), -ENOPKG);
3325 assert_return(!event_pid_changed(e), -ECHILD);
3326
3327 return e->epoll_fd;
3328 }
3329
3330 _public_ int sd_event_get_state(sd_event *e) {
3331 assert_return(e, -EINVAL);
3332 assert_return(e = event_resolve(e), -ENOPKG);
3333 assert_return(!event_pid_changed(e), -ECHILD);
3334
3335 return e->state;
3336 }
3337
3338 _public_ int sd_event_get_exit_code(sd_event *e, int *code) {
3339 assert_return(e, -EINVAL);
3340 assert_return(e = event_resolve(e), -ENOPKG);
3341 assert_return(code, -EINVAL);
3342 assert_return(!event_pid_changed(e), -ECHILD);
3343
3344 if (!e->exit_requested)
3345 return -ENODATA;
3346
3347 *code = e->exit_code;
3348 return 0;
3349 }
3350
3351 _public_ int sd_event_exit(sd_event *e, int code) {
3352 assert_return(e, -EINVAL);
3353 assert_return(e = event_resolve(e), -ENOPKG);
3354 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3355 assert_return(!event_pid_changed(e), -ECHILD);
3356
3357 e->exit_requested = true;
3358 e->exit_code = code;
3359
3360 return 0;
3361 }
3362
3363 _public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
3364 assert_return(e, -EINVAL);
3365 assert_return(e = event_resolve(e), -ENOPKG);
3366 assert_return(usec, -EINVAL);
3367 assert_return(!event_pid_changed(e), -ECHILD);
3368
3369 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3370 return -EOPNOTSUPP;
3371
3372 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3373 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3374 * the purpose of getting the time this doesn't matter. */
3375 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3376 return -EOPNOTSUPP;
3377
3378 if (!triple_timestamp_is_set(&e->timestamp)) {
3379 /* Implicitly fall back to now() if we never ran
3380 * before and thus have no cached time. */
3381 *usec = now(clock);
3382 return 1;
3383 }
3384
3385 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
3386 return 0;
3387 }
3388
3389 _public_ int sd_event_default(sd_event **ret) {
3390 sd_event *e = NULL;
3391 int r;
3392
3393 if (!ret)
3394 return !!default_event;
3395
3396 if (default_event) {
3397 *ret = sd_event_ref(default_event);
3398 return 0;
3399 }
3400
3401 r = sd_event_new(&e);
3402 if (r < 0)
3403 return r;
3404
3405 e->default_event_ptr = &default_event;
3406 e->tid = gettid();
3407 default_event = e;
3408
3409 *ret = e;
3410 return 1;
3411 }
3412
3413 _public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3414 assert_return(e, -EINVAL);
3415 assert_return(e = event_resolve(e), -ENOPKG);
3416 assert_return(tid, -EINVAL);
3417 assert_return(!event_pid_changed(e), -ECHILD);
3418
3419 if (e->tid != 0) {
3420 *tid = e->tid;
3421 return 0;
3422 }
3423
3424 return -ENXIO;
3425 }
3426
3427 _public_ int sd_event_set_watchdog(sd_event *e, int b) {
3428 int r;
3429
3430 assert_return(e, -EINVAL);
3431 assert_return(e = event_resolve(e), -ENOPKG);
3432 assert_return(!event_pid_changed(e), -ECHILD);
3433
3434 if (e->watchdog == !!b)
3435 return e->watchdog;
3436
3437 if (b) {
3438 struct epoll_event ev;
3439
3440 r = sd_watchdog_enabled(false, &e->watchdog_period);
3441 if (r <= 0)
3442 return r;
3443
3444 /* Issue first ping immediately */
3445 sd_notify(false, "WATCHDOG=1");
3446 e->watchdog_last = now(CLOCK_MONOTONIC);
3447
3448 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3449 if (e->watchdog_fd < 0)
3450 return -errno;
3451
3452 r = arm_watchdog(e);
3453 if (r < 0)
3454 goto fail;
3455
3456 ev = (struct epoll_event) {
3457 .events = EPOLLIN,
3458 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3459 };
3460
3461 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3462 if (r < 0) {
3463 r = -errno;
3464 goto fail;
3465 }
3466
3467 } else {
3468 if (e->watchdog_fd >= 0) {
3469 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
3470 e->watchdog_fd = safe_close(e->watchdog_fd);
3471 }
3472 }
3473
3474 e->watchdog = !!b;
3475 return e->watchdog;
3476
3477 fail:
3478 e->watchdog_fd = safe_close(e->watchdog_fd);
3479 return r;
3480 }
3481
3482 _public_ int sd_event_get_watchdog(sd_event *e) {
3483 assert_return(e, -EINVAL);
3484 assert_return(e = event_resolve(e), -ENOPKG);
3485 assert_return(!event_pid_changed(e), -ECHILD);
3486
3487 return e->watchdog;
3488 }
3489
3490 _public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3491 assert_return(e, -EINVAL);
3492 assert_return(e = event_resolve(e), -ENOPKG);
3493 assert_return(!event_pid_changed(e), -ECHILD);
3494
3495 *ret = e->iteration;
3496 return 0;
3497 }
3498
3499 _public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3500 assert_return(s, -EINVAL);
3501
3502 s->destroy_callback = callback;
3503 return 0;
3504 }
3505
3506 _public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3507 assert_return(s, -EINVAL);
3508
3509 if (ret)
3510 *ret = s->destroy_callback;
3511
3512 return !!s->destroy_callback;
3513 }
3514
3515 _public_ int sd_event_source_get_floating(sd_event_source *s) {
3516 assert_return(s, -EINVAL);
3517
3518 return s->floating;
3519 }
3520
3521 _public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3522 assert_return(s, -EINVAL);
3523
3524 if (s->floating == !!b)
3525 return 0;
3526
3527 if (!s->event) /* Already disconnected */
3528 return -ESTALE;
3529
3530 s->floating = b;
3531
3532 if (b) {
3533 sd_event_source_ref(s);
3534 sd_event_unref(s->event);
3535 } else {
3536 sd_event_ref(s->event);
3537 sd_event_source_unref(s);
3538 }
3539
3540 return 1;
3541 }