]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/libsystemd/sd-event/sd-event.c
sd-event: use DIV_ROUND_UP where appropriate
[thirdparty/systemd.git] / src / libsystemd / sd-event / sd-event.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
fd38203a
LP
2
3#include <sys/epoll.h>
4#include <sys/timerfd.h>
5#include <sys/wait.h>
6
cde93897 7#include "sd-daemon.h"
07630cea
LP
8#include "sd-event.h"
9#include "sd-id128.h"
10
b5efdb8a 11#include "alloc-util.h"
a137a1c3 12#include "event-source.h"
3ffd4af2 13#include "fd-util.h"
97ef5391 14#include "fs-util.h"
fd38203a 15#include "hashmap.h"
07630cea
LP
16#include "list.h"
17#include "macro.h"
0a970718 18#include "memory-util.h"
afc6adb5 19#include "missing.h"
07630cea 20#include "prioq.h"
4a0b58c4 21#include "process-util.h"
6e9feda3 22#include "set.h"
24882e06 23#include "signal-util.h"
55cbfaa5 24#include "string-table.h"
07630cea
LP
25#include "string-util.h"
26#include "time-util.h"
fd38203a 27
c2ba3ad6 28#define DEFAULT_ACCURACY_USEC (250 * USEC_PER_MSEC)
fd38203a 29
55cbfaa5
DM
30static const char* const event_source_type_table[_SOURCE_EVENT_SOURCE_TYPE_MAX] = {
31 [SOURCE_IO] = "io",
32 [SOURCE_TIME_REALTIME] = "realtime",
33 [SOURCE_TIME_BOOTTIME] = "bootime",
34 [SOURCE_TIME_MONOTONIC] = "monotonic",
35 [SOURCE_TIME_REALTIME_ALARM] = "realtime-alarm",
36 [SOURCE_TIME_BOOTTIME_ALARM] = "boottime-alarm",
37 [SOURCE_SIGNAL] = "signal",
38 [SOURCE_CHILD] = "child",
39 [SOURCE_DEFER] = "defer",
40 [SOURCE_POST] = "post",
41 [SOURCE_EXIT] = "exit",
42 [SOURCE_WATCHDOG] = "watchdog",
97ef5391 43 [SOURCE_INOTIFY] = "inotify",
55cbfaa5
DM
44};
45
46DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(event_source_type, int);
47
a8548816 48#define EVENT_SOURCE_IS_TIME(t) IN_SET((t), SOURCE_TIME_REALTIME, SOURCE_TIME_BOOTTIME, SOURCE_TIME_MONOTONIC, SOURCE_TIME_REALTIME_ALARM, SOURCE_TIME_BOOTTIME_ALARM)
6a0f1f6d 49
fd38203a 50struct sd_event {
da7e457c 51 unsigned n_ref;
fd38203a
LP
52
53 int epoll_fd;
cde93897 54 int watchdog_fd;
fd38203a
LP
55
56 Prioq *pending;
57 Prioq *prepare;
c2ba3ad6 58
a8548816 59 /* timerfd_create() only supports these five clocks so far. We
6a0f1f6d
LP
60 * can add support for more clocks when the kernel learns to
61 * deal with them, too. */
62 struct clock_data realtime;
a8548816 63 struct clock_data boottime;
6a0f1f6d
LP
64 struct clock_data monotonic;
65 struct clock_data realtime_alarm;
66 struct clock_data boottime_alarm;
fd38203a 67
da7e457c
LP
68 usec_t perturb;
69
9da4cb2b
LP
70 sd_event_source **signal_sources; /* indexed by signal number */
71 Hashmap *signal_data; /* indexed by priority */
fd38203a
LP
72
73 Hashmap *child_sources;
baf76283 74 unsigned n_enabled_child_sources;
fd38203a 75
6e9feda3
LP
76 Set *post_sources;
77
6203e07a 78 Prioq *exit;
fd38203a 79
97ef5391
LP
80 Hashmap *inotify_data; /* indexed by priority */
81
82 /* A list of inode structures that still have an fd open, that we need to close before the next loop iteration */
83 LIST_HEAD(struct inode_data, inode_data_to_close);
84
85 /* A list of inotify objects that already have events buffered which aren't processed yet */
86 LIST_HEAD(struct inotify_data, inotify_data_buffered);
87
da7e457c 88 pid_t original_pid;
c2ba3ad6 89
60a3b1e1 90 uint64_t iteration;
e475d10c 91 triple_timestamp timestamp;
da7e457c 92 int state;
eaa3cbef 93
6203e07a 94 bool exit_requested:1;
da7e457c 95 bool need_process_child:1;
cde93897 96 bool watchdog:1;
34b87517 97 bool profile_delays:1;
afc6adb5 98
6203e07a
LP
99 int exit_code;
100
afc6adb5
LP
101 pid_t tid;
102 sd_event **default_event_ptr;
cde93897
LP
103
104 usec_t watchdog_last, watchdog_period;
15b38f93
LP
105
106 unsigned n_sources;
a71fe8b8
LP
107
108 LIST_HEAD(sd_event_source, sources);
34b87517
VC
109
110 usec_t last_run, last_log;
111 unsigned delays[sizeof(usec_t) * 8];
fd38203a
LP
112};
113
b937d761
NM
114static thread_local sd_event *default_event = NULL;
115
a71fe8b8 116static void source_disconnect(sd_event_source *s);
97ef5391 117static void event_gc_inode_data(sd_event *e, struct inode_data *d);
a71fe8b8 118
b937d761
NM
119static sd_event *event_resolve(sd_event *e) {
120 return e == SD_EVENT_DEFAULT ? default_event : e;
121}
122
fd38203a
LP
123static int pending_prioq_compare(const void *a, const void *b) {
124 const sd_event_source *x = a, *y = b;
9c57a73b 125 int r;
fd38203a
LP
126
127 assert(x->pending);
128 assert(y->pending);
129
baf76283
LP
130 /* Enabled ones first */
131 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 132 return -1;
baf76283 133 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
134 return 1;
135
136 /* Lower priority values first */
9c57a73b
YW
137 r = CMP(x->priority, y->priority);
138 if (r != 0)
139 return r;
fd38203a
LP
140
141 /* Older entries first */
9c57a73b 142 return CMP(x->pending_iteration, y->pending_iteration);
fd38203a
LP
143}
144
145static int prepare_prioq_compare(const void *a, const void *b) {
146 const sd_event_source *x = a, *y = b;
9c57a73b 147 int r;
fd38203a
LP
148
149 assert(x->prepare);
150 assert(y->prepare);
151
8046c457
KK
152 /* Enabled ones first */
153 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
154 return -1;
155 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
156 return 1;
157
fd38203a
LP
158 /* Move most recently prepared ones last, so that we can stop
159 * preparing as soon as we hit one that has already been
160 * prepared in the current iteration */
9c57a73b
YW
161 r = CMP(x->prepare_iteration, y->prepare_iteration);
162 if (r != 0)
163 return r;
fd38203a 164
fd38203a 165 /* Lower priority values first */
9c57a73b 166 return CMP(x->priority, y->priority);
fd38203a
LP
167}
168
c2ba3ad6 169static int earliest_time_prioq_compare(const void *a, const void *b) {
fd38203a
LP
170 const sd_event_source *x = a, *y = b;
171
6a0f1f6d
LP
172 assert(EVENT_SOURCE_IS_TIME(x->type));
173 assert(x->type == y->type);
fd38203a 174
baf76283
LP
175 /* Enabled ones first */
176 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
fd38203a 177 return -1;
baf76283 178 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
fd38203a
LP
179 return 1;
180
181 /* Move the pending ones to the end */
182 if (!x->pending && y->pending)
183 return -1;
184 if (x->pending && !y->pending)
185 return 1;
186
187 /* Order by time */
9c57a73b 188 return CMP(x->time.next, y->time.next);
fd38203a
LP
189}
190
1bce0ffa
LP
191static usec_t time_event_source_latest(const sd_event_source *s) {
192 return usec_add(s->time.next, s->time.accuracy);
193}
194
c2ba3ad6
LP
195static int latest_time_prioq_compare(const void *a, const void *b) {
196 const sd_event_source *x = a, *y = b;
197
6a0f1f6d
LP
198 assert(EVENT_SOURCE_IS_TIME(x->type));
199 assert(x->type == y->type);
c2ba3ad6 200
baf76283
LP
201 /* Enabled ones first */
202 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
c2ba3ad6 203 return -1;
baf76283 204 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
c2ba3ad6
LP
205 return 1;
206
207 /* Move the pending ones to the end */
208 if (!x->pending && y->pending)
209 return -1;
210 if (x->pending && !y->pending)
211 return 1;
212
213 /* Order by time */
9c57a73b 214 return CMP(time_event_source_latest(x), time_event_source_latest(y));
c2ba3ad6
LP
215}
216
6203e07a 217static int exit_prioq_compare(const void *a, const void *b) {
da7e457c
LP
218 const sd_event_source *x = a, *y = b;
219
6203e07a
LP
220 assert(x->type == SOURCE_EXIT);
221 assert(y->type == SOURCE_EXIT);
da7e457c 222
baf76283
LP
223 /* Enabled ones first */
224 if (x->enabled != SD_EVENT_OFF && y->enabled == SD_EVENT_OFF)
da7e457c 225 return -1;
baf76283 226 if (x->enabled == SD_EVENT_OFF && y->enabled != SD_EVENT_OFF)
da7e457c
LP
227 return 1;
228
229 /* Lower priority values first */
6dd91b36 230 return CMP(x->priority, y->priority);
da7e457c
LP
231}
232
6a0f1f6d
LP
233static void free_clock_data(struct clock_data *d) {
234 assert(d);
9da4cb2b 235 assert(d->wakeup == WAKEUP_CLOCK_DATA);
6a0f1f6d
LP
236
237 safe_close(d->fd);
238 prioq_free(d->earliest);
239 prioq_free(d->latest);
240}
241
8301aa0b 242static sd_event *event_free(sd_event *e) {
a71fe8b8
LP
243 sd_event_source *s;
244
fd38203a 245 assert(e);
a71fe8b8
LP
246
247 while ((s = e->sources)) {
248 assert(s->floating);
249 source_disconnect(s);
250 sd_event_source_unref(s);
251 }
252
15b38f93 253 assert(e->n_sources == 0);
fd38203a 254
afc6adb5
LP
255 if (e->default_event_ptr)
256 *(e->default_event_ptr) = NULL;
257
03e334a1 258 safe_close(e->epoll_fd);
03e334a1 259 safe_close(e->watchdog_fd);
cde93897 260
6a0f1f6d 261 free_clock_data(&e->realtime);
a8548816 262 free_clock_data(&e->boottime);
6a0f1f6d
LP
263 free_clock_data(&e->monotonic);
264 free_clock_data(&e->realtime_alarm);
265 free_clock_data(&e->boottime_alarm);
266
fd38203a
LP
267 prioq_free(e->pending);
268 prioq_free(e->prepare);
6203e07a 269 prioq_free(e->exit);
fd38203a
LP
270
271 free(e->signal_sources);
9da4cb2b 272 hashmap_free(e->signal_data);
fd38203a 273
97ef5391
LP
274 hashmap_free(e->inotify_data);
275
fd38203a 276 hashmap_free(e->child_sources);
6e9feda3 277 set_free(e->post_sources);
8301aa0b
YW
278
279 return mfree(e);
fd38203a
LP
280}
281
f7262a9f 282_public_ int sd_event_new(sd_event** ret) {
fd38203a
LP
283 sd_event *e;
284 int r;
285
305f78bf 286 assert_return(ret, -EINVAL);
fd38203a 287
d08eb1fa 288 e = new(sd_event, 1);
fd38203a
LP
289 if (!e)
290 return -ENOMEM;
291
d08eb1fa
LP
292 *e = (sd_event) {
293 .n_ref = 1,
294 .epoll_fd = -1,
295 .watchdog_fd = -1,
296 .realtime.wakeup = WAKEUP_CLOCK_DATA,
297 .realtime.fd = -1,
298 .realtime.next = USEC_INFINITY,
299 .boottime.wakeup = WAKEUP_CLOCK_DATA,
300 .boottime.fd = -1,
301 .boottime.next = USEC_INFINITY,
302 .monotonic.wakeup = WAKEUP_CLOCK_DATA,
303 .monotonic.fd = -1,
304 .monotonic.next = USEC_INFINITY,
305 .realtime_alarm.wakeup = WAKEUP_CLOCK_DATA,
306 .realtime_alarm.fd = -1,
307 .realtime_alarm.next = USEC_INFINITY,
308 .boottime_alarm.wakeup = WAKEUP_CLOCK_DATA,
309 .boottime_alarm.fd = -1,
310 .boottime_alarm.next = USEC_INFINITY,
311 .perturb = USEC_INFINITY,
312 .original_pid = getpid_cached(),
313 };
fd38203a 314
c983e776
EV
315 r = prioq_ensure_allocated(&e->pending, pending_prioq_compare);
316 if (r < 0)
fd38203a 317 goto fail;
fd38203a
LP
318
319 e->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
320 if (e->epoll_fd < 0) {
321 r = -errno;
322 goto fail;
323 }
324
7fe2903c
LP
325 e->epoll_fd = fd_move_above_stdio(e->epoll_fd);
326
34b87517 327 if (secure_getenv("SD_EVENT_PROFILE_DELAYS")) {
34a6843d 328 log_debug("Event loop profiling enabled. Logarithmic histogram of event loop iterations in the range 2^0 ... 2^63 us will be logged every 5s.");
34b87517
VC
329 e->profile_delays = true;
330 }
331
fd38203a
LP
332 *ret = e;
333 return 0;
334
335fail:
336 event_free(e);
337 return r;
338}
339
8301aa0b 340DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event, sd_event, event_free);
fd38203a 341
eaa3cbef
LP
342static bool event_pid_changed(sd_event *e) {
343 assert(e);
344
a2360a46 345 /* We don't support people creating an event loop and keeping
eaa3cbef
LP
346 * it around over a fork(). Let's complain. */
347
df0ff127 348 return e->original_pid != getpid_cached();
eaa3cbef
LP
349}
350
366e6411 351static void source_io_unregister(sd_event_source *s) {
fd38203a
LP
352 int r;
353
354 assert(s);
355 assert(s->type == SOURCE_IO);
356
f6806734 357 if (event_pid_changed(s->event))
366e6411 358 return;
f6806734 359
fd38203a 360 if (!s->io.registered)
366e6411 361 return;
fd38203a
LP
362
363 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, s->io.fd, NULL);
556c25cf 364 if (r < 0)
55cbfaa5
DM
365 log_debug_errno(errno, "Failed to remove source %s (type %s) from epoll: %m",
366 strna(s->description), event_source_type_to_string(s->type));
fd38203a
LP
367
368 s->io.registered = false;
fd38203a
LP
369}
370
305f78bf
LP
371static int source_io_register(
372 sd_event_source *s,
373 int enabled,
374 uint32_t events) {
375
a82f89aa 376 struct epoll_event ev;
fd38203a
LP
377 int r;
378
379 assert(s);
380 assert(s->type == SOURCE_IO);
baf76283 381 assert(enabled != SD_EVENT_OFF);
fd38203a 382
a82f89aa
LP
383 ev = (struct epoll_event) {
384 .events = events | (enabled == SD_EVENT_ONESHOT ? EPOLLONESHOT : 0),
385 .data.ptr = s,
386 };
fd38203a
LP
387
388 if (s->io.registered)
389 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_MOD, s->io.fd, &ev);
390 else
391 r = epoll_ctl(s->event->epoll_fd, EPOLL_CTL_ADD, s->io.fd, &ev);
fd38203a
LP
392 if (r < 0)
393 return -errno;
394
395 s->io.registered = true;
396
397 return 0;
398}
399
6a0f1f6d
LP
400static clockid_t event_source_type_to_clock(EventSourceType t) {
401
402 switch (t) {
403
404 case SOURCE_TIME_REALTIME:
405 return CLOCK_REALTIME;
406
a8548816
TG
407 case SOURCE_TIME_BOOTTIME:
408 return CLOCK_BOOTTIME;
409
6a0f1f6d
LP
410 case SOURCE_TIME_MONOTONIC:
411 return CLOCK_MONOTONIC;
412
413 case SOURCE_TIME_REALTIME_ALARM:
414 return CLOCK_REALTIME_ALARM;
415
416 case SOURCE_TIME_BOOTTIME_ALARM:
417 return CLOCK_BOOTTIME_ALARM;
418
419 default:
420 return (clockid_t) -1;
421 }
422}
423
424static EventSourceType clock_to_event_source_type(clockid_t clock) {
425
426 switch (clock) {
427
428 case CLOCK_REALTIME:
429 return SOURCE_TIME_REALTIME;
430
a8548816
TG
431 case CLOCK_BOOTTIME:
432 return SOURCE_TIME_BOOTTIME;
433
6a0f1f6d
LP
434 case CLOCK_MONOTONIC:
435 return SOURCE_TIME_MONOTONIC;
436
437 case CLOCK_REALTIME_ALARM:
438 return SOURCE_TIME_REALTIME_ALARM;
439
440 case CLOCK_BOOTTIME_ALARM:
441 return SOURCE_TIME_BOOTTIME_ALARM;
442
443 default:
444 return _SOURCE_EVENT_SOURCE_TYPE_INVALID;
445 }
446}
447
448static struct clock_data* event_get_clock_data(sd_event *e, EventSourceType t) {
449 assert(e);
450
451 switch (t) {
452
453 case SOURCE_TIME_REALTIME:
454 return &e->realtime;
455
a8548816
TG
456 case SOURCE_TIME_BOOTTIME:
457 return &e->boottime;
458
6a0f1f6d
LP
459 case SOURCE_TIME_MONOTONIC:
460 return &e->monotonic;
461
462 case SOURCE_TIME_REALTIME_ALARM:
463 return &e->realtime_alarm;
464
465 case SOURCE_TIME_BOOTTIME_ALARM:
466 return &e->boottime_alarm;
467
468 default:
469 return NULL;
470 }
471}
472
3e4eb8e7
YW
473static void event_free_signal_data(sd_event *e, struct signal_data *d) {
474 assert(e);
475
476 if (!d)
477 return;
478
479 hashmap_remove(e->signal_data, &d->priority);
480 safe_close(d->fd);
481 free(d);
482}
483
9da4cb2b
LP
484static int event_make_signal_data(
485 sd_event *e,
486 int sig,
487 struct signal_data **ret) {
4807d2d0 488
a82f89aa 489 struct epoll_event ev;
9da4cb2b
LP
490 struct signal_data *d;
491 bool added = false;
492 sigset_t ss_copy;
493 int64_t priority;
f95387cd
ZJS
494 int r;
495
496 assert(e);
497
f6806734 498 if (event_pid_changed(e))
9da4cb2b 499 return -ECHILD;
f6806734 500
9da4cb2b
LP
501 if (e->signal_sources && e->signal_sources[sig])
502 priority = e->signal_sources[sig]->priority;
503 else
de05913d 504 priority = SD_EVENT_PRIORITY_NORMAL;
f95387cd 505
9da4cb2b
LP
506 d = hashmap_get(e->signal_data, &priority);
507 if (d) {
508 if (sigismember(&d->sigset, sig) > 0) {
509 if (ret)
510 *ret = d;
511 return 0;
512 }
513 } else {
514 r = hashmap_ensure_allocated(&e->signal_data, &uint64_hash_ops);
515 if (r < 0)
516 return r;
517
d08eb1fa 518 d = new(struct signal_data, 1);
9da4cb2b
LP
519 if (!d)
520 return -ENOMEM;
521
d08eb1fa
LP
522 *d = (struct signal_data) {
523 .wakeup = WAKEUP_SIGNAL_DATA,
524 .fd = -1,
525 .priority = priority,
526 };
9da4cb2b
LP
527
528 r = hashmap_put(e->signal_data, &d->priority, d);
90f604d1
ZJS
529 if (r < 0) {
530 free(d);
9da4cb2b 531 return r;
90f604d1 532 }
f95387cd 533
9da4cb2b
LP
534 added = true;
535 }
536
537 ss_copy = d->sigset;
538 assert_se(sigaddset(&ss_copy, sig) >= 0);
539
540 r = signalfd(d->fd, &ss_copy, SFD_NONBLOCK|SFD_CLOEXEC);
541 if (r < 0) {
542 r = -errno;
543 goto fail;
544 }
545
546 d->sigset = ss_copy;
f95387cd 547
9da4cb2b
LP
548 if (d->fd >= 0) {
549 if (ret)
550 *ret = d;
f95387cd 551 return 0;
9da4cb2b
LP
552 }
553
7fe2903c 554 d->fd = fd_move_above_stdio(r);
f95387cd 555
a82f89aa
LP
556 ev = (struct epoll_event) {
557 .events = EPOLLIN,
558 .data.ptr = d,
559 };
f95387cd 560
9da4cb2b
LP
561 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev);
562 if (r < 0) {
563 r = -errno;
564 goto fail;
f95387cd
ZJS
565 }
566
9da4cb2b
LP
567 if (ret)
568 *ret = d;
569
f95387cd 570 return 0;
9da4cb2b
LP
571
572fail:
3e4eb8e7
YW
573 if (added)
574 event_free_signal_data(e, d);
9da4cb2b
LP
575
576 return r;
577}
578
579static void event_unmask_signal_data(sd_event *e, struct signal_data *d, int sig) {
580 assert(e);
581 assert(d);
582
583 /* Turns off the specified signal in the signal data
584 * object. If the signal mask of the object becomes empty that
585 * way removes it. */
586
587 if (sigismember(&d->sigset, sig) == 0)
588 return;
589
590 assert_se(sigdelset(&d->sigset, sig) >= 0);
591
592 if (sigisemptyset(&d->sigset)) {
9da4cb2b 593 /* If all the mask is all-zero we can get rid of the structure */
3e4eb8e7 594 event_free_signal_data(e, d);
9da4cb2b
LP
595 return;
596 }
597
598 assert(d->fd >= 0);
599
600 if (signalfd(d->fd, &d->sigset, SFD_NONBLOCK|SFD_CLOEXEC) < 0)
601 log_debug_errno(errno, "Failed to unset signal bit, ignoring: %m");
602}
603
604static void event_gc_signal_data(sd_event *e, const int64_t *priority, int sig) {
605 struct signal_data *d;
606 static const int64_t zero_priority = 0;
607
608 assert(e);
609
610 /* Rechecks if the specified signal is still something we are
611 * interested in. If not, we'll unmask it, and possibly drop
612 * the signalfd for it. */
613
614 if (sig == SIGCHLD &&
615 e->n_enabled_child_sources > 0)
616 return;
617
618 if (e->signal_sources &&
619 e->signal_sources[sig] &&
620 e->signal_sources[sig]->enabled != SD_EVENT_OFF)
621 return;
622
623 /*
624 * The specified signal might be enabled in three different queues:
625 *
626 * 1) the one that belongs to the priority passed (if it is non-NULL)
627 * 2) the one that belongs to the priority of the event source of the signal (if there is one)
628 * 3) the 0 priority (to cover the SIGCHLD case)
629 *
630 * Hence, let's remove it from all three here.
631 */
632
633 if (priority) {
634 d = hashmap_get(e->signal_data, priority);
635 if (d)
636 event_unmask_signal_data(e, d, sig);
637 }
638
639 if (e->signal_sources && e->signal_sources[sig]) {
640 d = hashmap_get(e->signal_data, &e->signal_sources[sig]->priority);
641 if (d)
642 event_unmask_signal_data(e, d, sig);
643 }
644
645 d = hashmap_get(e->signal_data, &zero_priority);
646 if (d)
647 event_unmask_signal_data(e, d, sig);
f95387cd
ZJS
648}
649
a71fe8b8
LP
650static void source_disconnect(sd_event_source *s) {
651 sd_event *event;
652
fd38203a
LP
653 assert(s);
654
a71fe8b8
LP
655 if (!s->event)
656 return;
15b38f93 657
a71fe8b8 658 assert(s->event->n_sources > 0);
fd38203a 659
a71fe8b8 660 switch (s->type) {
fd38203a 661
a71fe8b8
LP
662 case SOURCE_IO:
663 if (s->io.fd >= 0)
664 source_io_unregister(s);
fd38203a 665
a71fe8b8 666 break;
6a0f1f6d 667
a71fe8b8 668 case SOURCE_TIME_REALTIME:
a8548816 669 case SOURCE_TIME_BOOTTIME:
a71fe8b8
LP
670 case SOURCE_TIME_MONOTONIC:
671 case SOURCE_TIME_REALTIME_ALARM:
672 case SOURCE_TIME_BOOTTIME_ALARM: {
673 struct clock_data *d;
fd38203a 674
a71fe8b8
LP
675 d = event_get_clock_data(s->event, s->type);
676 assert(d);
677
678 prioq_remove(d->earliest, s, &s->time.earliest_index);
679 prioq_remove(d->latest, s, &s->time.latest_index);
212bbb17 680 d->needs_rearm = true;
a71fe8b8
LP
681 break;
682 }
683
684 case SOURCE_SIGNAL:
685 if (s->signal.sig > 0) {
9da4cb2b 686
a71fe8b8
LP
687 if (s->event->signal_sources)
688 s->event->signal_sources[s->signal.sig] = NULL;
4807d2d0 689
9da4cb2b 690 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
6a0f1f6d 691 }
fd38203a 692
a71fe8b8 693 break;
fd38203a 694
a71fe8b8
LP
695 case SOURCE_CHILD:
696 if (s->child.pid > 0) {
697 if (s->enabled != SD_EVENT_OFF) {
698 assert(s->event->n_enabled_child_sources > 0);
699 s->event->n_enabled_child_sources--;
4807d2d0 700 }
fd38203a 701
4a0b58c4 702 (void) hashmap_remove(s->event->child_sources, PID_TO_PTR(s->child.pid));
9da4cb2b 703 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
a71fe8b8 704 }
fd38203a 705
a71fe8b8 706 break;
fd38203a 707
a71fe8b8
LP
708 case SOURCE_DEFER:
709 /* nothing */
710 break;
fd38203a 711
a71fe8b8
LP
712 case SOURCE_POST:
713 set_remove(s->event->post_sources, s);
714 break;
da7e457c 715
a71fe8b8
LP
716 case SOURCE_EXIT:
717 prioq_remove(s->event->exit, s, &s->exit.prioq_index);
718 break;
0eb2e0e3 719
97ef5391
LP
720 case SOURCE_INOTIFY: {
721 struct inode_data *inode_data;
722
723 inode_data = s->inotify.inode_data;
724 if (inode_data) {
725 struct inotify_data *inotify_data;
726 assert_se(inotify_data = inode_data->inotify_data);
727
728 /* Detach this event source from the inode object */
729 LIST_REMOVE(inotify.by_inode_data, inode_data->event_sources, s);
730 s->inotify.inode_data = NULL;
731
732 if (s->pending) {
733 assert(inotify_data->n_pending > 0);
734 inotify_data->n_pending--;
735 }
736
737 /* Note that we don't reduce the inotify mask for the watch descriptor here if the inode is
738 * continued to being watched. That's because inotify doesn't really have an API for that: we
739 * can only change watch masks with access to the original inode either by fd or by path. But
740 * paths aren't stable, and keeping an O_PATH fd open all the time would mean wasting an fd
f21f31b2 741 * continuously and keeping the mount busy which we can't really do. We could reconstruct the
97ef5391
LP
742 * original inode from /proc/self/fdinfo/$INOTIFY_FD (as all watch descriptors are listed
743 * there), but given the need for open_by_handle_at() which is privileged and not universally
744 * available this would be quite an incomplete solution. Hence we go the other way, leave the
745 * mask set, even if it is not minimized now, and ignore all events we aren't interested in
746 * anymore after reception. Yes, this sucks, but … Linux … */
747
748 /* Maybe release the inode data (and its inotify) */
749 event_gc_inode_data(s->event, inode_data);
750 }
751
752 break;
753 }
754
a71fe8b8
LP
755 default:
756 assert_not_reached("Wut? I shouldn't exist.");
757 }
6e9feda3 758
a71fe8b8
LP
759 if (s->pending)
760 prioq_remove(s->event->pending, s, &s->pending_index);
9d3e3aa5 761
a71fe8b8
LP
762 if (s->prepare)
763 prioq_remove(s->event->prepare, s, &s->prepare_index);
fd38203a 764
a71fe8b8 765 event = s->event;
fd38203a 766
a71fe8b8
LP
767 s->type = _SOURCE_EVENT_SOURCE_TYPE_INVALID;
768 s->event = NULL;
769 LIST_REMOVE(sources, event->sources, s);
770 event->n_sources--;
fd38203a 771
a71fe8b8
LP
772 if (!s->floating)
773 sd_event_unref(event);
774}
775
776static void source_free(sd_event_source *s) {
777 assert(s);
fd38203a 778
a71fe8b8 779 source_disconnect(s);
ab93297c
NM
780
781 if (s->type == SOURCE_IO && s->io.owned)
15723a1d
LP
782 s->io.fd = safe_close(s->io.fd);
783
784 if (s->destroy_callback)
785 s->destroy_callback(s->userdata);
ab93297c 786
356779df 787 free(s->description);
fd38203a
LP
788 free(s);
789}
8c75fe17 790DEFINE_TRIVIAL_CLEANUP_FUNC(sd_event_source*, source_free);
fd38203a
LP
791
792static int source_set_pending(sd_event_source *s, bool b) {
793 int r;
794
795 assert(s);
6203e07a 796 assert(s->type != SOURCE_EXIT);
fd38203a
LP
797
798 if (s->pending == b)
799 return 0;
800
801 s->pending = b;
802
803 if (b) {
804 s->pending_iteration = s->event->iteration;
805
806 r = prioq_put(s->event->pending, s, &s->pending_index);
807 if (r < 0) {
808 s->pending = false;
809 return r;
810 }
811 } else
812 assert_se(prioq_remove(s->event->pending, s, &s->pending_index));
813
6a0f1f6d
LP
814 if (EVENT_SOURCE_IS_TIME(s->type)) {
815 struct clock_data *d;
816
817 d = event_get_clock_data(s->event, s->type);
818 assert(d);
819
820 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
821 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 822 d->needs_rearm = true;
2576a19e
LP
823 }
824
9da4cb2b
LP
825 if (s->type == SOURCE_SIGNAL && !b) {
826 struct signal_data *d;
827
828 d = hashmap_get(s->event->signal_data, &s->priority);
829 if (d && d->current == s)
830 d->current = NULL;
831 }
832
97ef5391
LP
833 if (s->type == SOURCE_INOTIFY) {
834
835 assert(s->inotify.inode_data);
836 assert(s->inotify.inode_data->inotify_data);
837
838 if (b)
839 s->inotify.inode_data->inotify_data->n_pending ++;
840 else {
841 assert(s->inotify.inode_data->inotify_data->n_pending > 0);
842 s->inotify.inode_data->inotify_data->n_pending --;
843 }
844 }
845
fd38203a
LP
846 return 0;
847}
848
a71fe8b8 849static sd_event_source *source_new(sd_event *e, bool floating, EventSourceType type) {
fd38203a
LP
850 sd_event_source *s;
851
852 assert(e);
853
d08eb1fa 854 s = new(sd_event_source, 1);
fd38203a
LP
855 if (!s)
856 return NULL;
857
d08eb1fa
LP
858 *s = (struct sd_event_source) {
859 .n_ref = 1,
860 .event = e,
861 .floating = floating,
862 .type = type,
863 .pending_index = PRIOQ_IDX_NULL,
864 .prepare_index = PRIOQ_IDX_NULL,
865 };
a71fe8b8
LP
866
867 if (!floating)
868 sd_event_ref(e);
fd38203a 869
a71fe8b8 870 LIST_PREPEND(sources, e->sources, s);
313cefa1 871 e->n_sources++;
15b38f93 872
fd38203a
LP
873 return s;
874}
875
f7262a9f 876_public_ int sd_event_add_io(
fd38203a 877 sd_event *e,
151b9b96 878 sd_event_source **ret,
fd38203a
LP
879 int fd,
880 uint32_t events,
718db961 881 sd_event_io_handler_t callback,
151b9b96 882 void *userdata) {
fd38203a 883
ec766a51 884 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
885 int r;
886
305f78bf 887 assert_return(e, -EINVAL);
b937d761 888 assert_return(e = event_resolve(e), -ENOPKG);
8ac43fee 889 assert_return(fd >= 0, -EBADF);
2a16a986 890 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
305f78bf 891 assert_return(callback, -EINVAL);
da7e457c 892 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 893 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 894
a71fe8b8 895 s = source_new(e, !ret, SOURCE_IO);
fd38203a
LP
896 if (!s)
897 return -ENOMEM;
898
9da4cb2b 899 s->wakeup = WAKEUP_EVENT_SOURCE;
fd38203a
LP
900 s->io.fd = fd;
901 s->io.events = events;
902 s->io.callback = callback;
903 s->userdata = userdata;
baf76283 904 s->enabled = SD_EVENT_ON;
fd38203a 905
baf76283 906 r = source_io_register(s, s->enabled, events);
ec766a51 907 if (r < 0)
050f74f2 908 return r;
fd38203a 909
a71fe8b8
LP
910 if (ret)
911 *ret = s;
ec766a51 912 TAKE_PTR(s);
a71fe8b8 913
fd38203a
LP
914 return 0;
915}
916
52444dc4
LP
917static void initialize_perturb(sd_event *e) {
918 sd_id128_t bootid = {};
919
920 /* When we sleep for longer, we try to realign the wakeup to
f21f31b2 921 the same time within each minute/second/250ms, so that
52444dc4
LP
922 events all across the system can be coalesced into a single
923 CPU wakeup. However, let's take some system-specific
924 randomness for this value, so that in a network of systems
925 with synced clocks timer events are distributed a
926 bit. Here, we calculate a perturbation usec offset from the
927 boot ID. */
928
3a43da28 929 if (_likely_(e->perturb != USEC_INFINITY))
52444dc4
LP
930 return;
931
932 if (sd_id128_get_boot(&bootid) >= 0)
933 e->perturb = (bootid.qwords[0] ^ bootid.qwords[1]) % USEC_PER_MINUTE;
934}
935
fd38203a
LP
936static int event_setup_timer_fd(
937 sd_event *e,
6a0f1f6d
LP
938 struct clock_data *d,
939 clockid_t clock) {
fd38203a 940
a82f89aa 941 struct epoll_event ev;
fd38203a
LP
942 int r, fd;
943
944 assert(e);
6a0f1f6d 945 assert(d);
fd38203a 946
6a0f1f6d 947 if (_likely_(d->fd >= 0))
fd38203a
LP
948 return 0;
949
6a0f1f6d 950 fd = timerfd_create(clock, TFD_NONBLOCK|TFD_CLOEXEC);
fd38203a
LP
951 if (fd < 0)
952 return -errno;
953
7fe2903c
LP
954 fd = fd_move_above_stdio(fd);
955
a82f89aa
LP
956 ev = (struct epoll_event) {
957 .events = EPOLLIN,
958 .data.ptr = d,
959 };
fd38203a
LP
960
961 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, fd, &ev);
962 if (r < 0) {
03e334a1 963 safe_close(fd);
fd38203a
LP
964 return -errno;
965 }
966
6a0f1f6d 967 d->fd = fd;
fd38203a
LP
968 return 0;
969}
970
c4f1aff2
TG
971static int time_exit_callback(sd_event_source *s, uint64_t usec, void *userdata) {
972 assert(s);
973
974 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
975}
976
6a0f1f6d 977_public_ int sd_event_add_time(
fd38203a 978 sd_event *e,
151b9b96 979 sd_event_source **ret,
6a0f1f6d 980 clockid_t clock,
fd38203a 981 uint64_t usec,
c2ba3ad6 982 uint64_t accuracy,
718db961 983 sd_event_time_handler_t callback,
151b9b96 984 void *userdata) {
fd38203a 985
6a0f1f6d 986 EventSourceType type;
ec766a51 987 _cleanup_(source_freep) sd_event_source *s = NULL;
6a0f1f6d 988 struct clock_data *d;
fd38203a
LP
989 int r;
990
305f78bf 991 assert_return(e, -EINVAL);
b937d761 992 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 993 assert_return(accuracy != (uint64_t) -1, -EINVAL);
da7e457c 994 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 995 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 996
e475d10c
LP
997 if (!clock_supported(clock)) /* Checks whether the kernel supports the clock */
998 return -EOPNOTSUPP;
999
1000 type = clock_to_event_source_type(clock); /* checks whether sd-event supports this clock */
1001 if (type < 0)
3411372e
LP
1002 return -EOPNOTSUPP;
1003
c4f1aff2
TG
1004 if (!callback)
1005 callback = time_exit_callback;
1006
6a0f1f6d
LP
1007 d = event_get_clock_data(e, type);
1008 assert(d);
c2ba3ad6 1009
c983e776
EV
1010 r = prioq_ensure_allocated(&d->earliest, earliest_time_prioq_compare);
1011 if (r < 0)
1012 return r;
fd38203a 1013
c983e776
EV
1014 r = prioq_ensure_allocated(&d->latest, latest_time_prioq_compare);
1015 if (r < 0)
1016 return r;
fd38203a 1017
6a0f1f6d
LP
1018 if (d->fd < 0) {
1019 r = event_setup_timer_fd(e, d, clock);
fd38203a
LP
1020 if (r < 0)
1021 return r;
1022 }
1023
a71fe8b8 1024 s = source_new(e, !ret, type);
fd38203a
LP
1025 if (!s)
1026 return -ENOMEM;
1027
1028 s->time.next = usec;
c2ba3ad6 1029 s->time.accuracy = accuracy == 0 ? DEFAULT_ACCURACY_USEC : accuracy;
fd38203a 1030 s->time.callback = callback;
da7e457c 1031 s->time.earliest_index = s->time.latest_index = PRIOQ_IDX_NULL;
fd38203a 1032 s->userdata = userdata;
baf76283 1033 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1034
e07bbb7c
TG
1035 d->needs_rearm = true;
1036
6a0f1f6d 1037 r = prioq_put(d->earliest, s, &s->time.earliest_index);
c2ba3ad6 1038 if (r < 0)
ec766a51 1039 return r;
c2ba3ad6 1040
6a0f1f6d 1041 r = prioq_put(d->latest, s, &s->time.latest_index);
c2ba3ad6 1042 if (r < 0)
ec766a51 1043 return r;
fd38203a 1044
a71fe8b8
LP
1045 if (ret)
1046 *ret = s;
ec766a51 1047 TAKE_PTR(s);
a71fe8b8 1048
fd38203a
LP
1049 return 0;
1050}
1051
59bc1fd7
LP
1052static int signal_exit_callback(sd_event_source *s, const struct signalfd_siginfo *si, void *userdata) {
1053 assert(s);
1054
1055 return sd_event_exit(sd_event_source_get_event(s), PTR_TO_INT(userdata));
1056}
1057
f7262a9f 1058_public_ int sd_event_add_signal(
305f78bf 1059 sd_event *e,
151b9b96 1060 sd_event_source **ret,
305f78bf 1061 int sig,
718db961 1062 sd_event_signal_handler_t callback,
151b9b96 1063 void *userdata) {
305f78bf 1064
ec766a51 1065 _cleanup_(source_freep) sd_event_source *s = NULL;
9da4cb2b 1066 struct signal_data *d;
3022d74b 1067 sigset_t ss;
fd38203a
LP
1068 int r;
1069
305f78bf 1070 assert_return(e, -EINVAL);
b937d761 1071 assert_return(e = event_resolve(e), -ENOPKG);
6eb7c172 1072 assert_return(SIGNAL_VALID(sig), -EINVAL);
da7e457c 1073 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1074 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1075
59bc1fd7
LP
1076 if (!callback)
1077 callback = signal_exit_callback;
1078
3022d74b 1079 r = pthread_sigmask(SIG_SETMASK, NULL, &ss);
50e0d56c
LP
1080 if (r != 0)
1081 return -r;
3022d74b
LP
1082
1083 if (!sigismember(&ss, sig))
1084 return -EBUSY;
1085
fd38203a
LP
1086 if (!e->signal_sources) {
1087 e->signal_sources = new0(sd_event_source*, _NSIG);
1088 if (!e->signal_sources)
1089 return -ENOMEM;
1090 } else if (e->signal_sources[sig])
1091 return -EBUSY;
1092
a71fe8b8 1093 s = source_new(e, !ret, SOURCE_SIGNAL);
fd38203a
LP
1094 if (!s)
1095 return -ENOMEM;
1096
1097 s->signal.sig = sig;
1098 s->signal.callback = callback;
1099 s->userdata = userdata;
baf76283 1100 s->enabled = SD_EVENT_ON;
fd38203a
LP
1101
1102 e->signal_sources[sig] = s;
fd38203a 1103
9da4cb2b 1104 r = event_make_signal_data(e, sig, &d);
ec766a51 1105 if (r < 0)
9da4cb2b 1106 return r;
fd38203a 1107
f1f00dbb
LP
1108 /* Use the signal name as description for the event source by default */
1109 (void) sd_event_source_set_description(s, signal_to_string(sig));
1110
a71fe8b8
LP
1111 if (ret)
1112 *ret = s;
ec766a51 1113 TAKE_PTR(s);
a71fe8b8 1114
fd38203a
LP
1115 return 0;
1116}
1117
f7262a9f 1118_public_ int sd_event_add_child(
305f78bf 1119 sd_event *e,
151b9b96 1120 sd_event_source **ret,
305f78bf
LP
1121 pid_t pid,
1122 int options,
718db961 1123 sd_event_child_handler_t callback,
151b9b96 1124 void *userdata) {
305f78bf 1125
ec766a51 1126 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1127 int r;
1128
305f78bf 1129 assert_return(e, -EINVAL);
b937d761 1130 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf
LP
1131 assert_return(pid > 1, -EINVAL);
1132 assert_return(!(options & ~(WEXITED|WSTOPPED|WCONTINUED)), -EINVAL);
1133 assert_return(options != 0, -EINVAL);
1134 assert_return(callback, -EINVAL);
da7e457c 1135 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1136 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1137
d5099efc 1138 r = hashmap_ensure_allocated(&e->child_sources, NULL);
fd38203a
LP
1139 if (r < 0)
1140 return r;
1141
4a0b58c4 1142 if (hashmap_contains(e->child_sources, PID_TO_PTR(pid)))
fd38203a
LP
1143 return -EBUSY;
1144
a71fe8b8 1145 s = source_new(e, !ret, SOURCE_CHILD);
fd38203a
LP
1146 if (!s)
1147 return -ENOMEM;
1148
1149 s->child.pid = pid;
1150 s->child.options = options;
1151 s->child.callback = callback;
1152 s->userdata = userdata;
baf76283 1153 s->enabled = SD_EVENT_ONESHOT;
fd38203a 1154
4a0b58c4 1155 r = hashmap_put(e->child_sources, PID_TO_PTR(pid), s);
ec766a51 1156 if (r < 0)
fd38203a 1157 return r;
fd38203a 1158
313cefa1 1159 e->n_enabled_child_sources++;
fd38203a 1160
9da4cb2b
LP
1161 r = event_make_signal_data(e, SIGCHLD, NULL);
1162 if (r < 0) {
1163 e->n_enabled_child_sources--;
9da4cb2b 1164 return r;
fd38203a
LP
1165 }
1166
c2ba3ad6
LP
1167 e->need_process_child = true;
1168
a71fe8b8
LP
1169 if (ret)
1170 *ret = s;
ec766a51 1171 TAKE_PTR(s);
a71fe8b8 1172
fd38203a
LP
1173 return 0;
1174}
1175
f7262a9f 1176_public_ int sd_event_add_defer(
305f78bf 1177 sd_event *e,
151b9b96 1178 sd_event_source **ret,
718db961 1179 sd_event_handler_t callback,
151b9b96 1180 void *userdata) {
305f78bf 1181
ec766a51 1182 _cleanup_(source_freep) sd_event_source *s = NULL;
fd38203a
LP
1183 int r;
1184
305f78bf 1185 assert_return(e, -EINVAL);
b937d761 1186 assert_return(e = event_resolve(e), -ENOPKG);
305f78bf 1187 assert_return(callback, -EINVAL);
da7e457c 1188 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1189 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 1190
a71fe8b8 1191 s = source_new(e, !ret, SOURCE_DEFER);
fd38203a
LP
1192 if (!s)
1193 return -ENOMEM;
1194
1195 s->defer.callback = callback;
1196 s->userdata = userdata;
baf76283 1197 s->enabled = SD_EVENT_ONESHOT;
fd38203a
LP
1198
1199 r = source_set_pending(s, true);
ec766a51 1200 if (r < 0)
fd38203a 1201 return r;
fd38203a 1202
a71fe8b8
LP
1203 if (ret)
1204 *ret = s;
ec766a51 1205 TAKE_PTR(s);
a71fe8b8 1206
fd38203a
LP
1207 return 0;
1208}
1209
6e9feda3
LP
1210_public_ int sd_event_add_post(
1211 sd_event *e,
1212 sd_event_source **ret,
1213 sd_event_handler_t callback,
1214 void *userdata) {
1215
ec766a51 1216 _cleanup_(source_freep) sd_event_source *s = NULL;
6e9feda3
LP
1217 int r;
1218
1219 assert_return(e, -EINVAL);
b937d761 1220 assert_return(e = event_resolve(e), -ENOPKG);
6e9feda3 1221 assert_return(callback, -EINVAL);
6e9feda3
LP
1222 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1223 assert_return(!event_pid_changed(e), -ECHILD);
1224
d5099efc 1225 r = set_ensure_allocated(&e->post_sources, NULL);
6e9feda3
LP
1226 if (r < 0)
1227 return r;
1228
a71fe8b8 1229 s = source_new(e, !ret, SOURCE_POST);
6e9feda3
LP
1230 if (!s)
1231 return -ENOMEM;
1232
1233 s->post.callback = callback;
1234 s->userdata = userdata;
1235 s->enabled = SD_EVENT_ON;
1236
1237 r = set_put(e->post_sources, s);
ec766a51 1238 if (r < 0)
6e9feda3 1239 return r;
6e9feda3 1240
a71fe8b8
LP
1241 if (ret)
1242 *ret = s;
ec766a51 1243 TAKE_PTR(s);
a71fe8b8 1244
6e9feda3
LP
1245 return 0;
1246}
1247
6203e07a 1248_public_ int sd_event_add_exit(
305f78bf 1249 sd_event *e,
151b9b96 1250 sd_event_source **ret,
718db961 1251 sd_event_handler_t callback,
151b9b96 1252 void *userdata) {
305f78bf 1253
ec766a51 1254 _cleanup_(source_freep) sd_event_source *s = NULL;
da7e457c
LP
1255 int r;
1256
1257 assert_return(e, -EINVAL);
b937d761 1258 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
1259 assert_return(callback, -EINVAL);
1260 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1261 assert_return(!event_pid_changed(e), -ECHILD);
1262
c983e776
EV
1263 r = prioq_ensure_allocated(&e->exit, exit_prioq_compare);
1264 if (r < 0)
1265 return r;
da7e457c 1266
a71fe8b8 1267 s = source_new(e, !ret, SOURCE_EXIT);
fd38203a 1268 if (!s)
da7e457c 1269 return -ENOMEM;
fd38203a 1270
6203e07a 1271 s->exit.callback = callback;
da7e457c 1272 s->userdata = userdata;
6203e07a 1273 s->exit.prioq_index = PRIOQ_IDX_NULL;
baf76283 1274 s->enabled = SD_EVENT_ONESHOT;
da7e457c 1275
6203e07a 1276 r = prioq_put(s->event->exit, s, &s->exit.prioq_index);
ec766a51 1277 if (r < 0)
da7e457c 1278 return r;
da7e457c 1279
a71fe8b8
LP
1280 if (ret)
1281 *ret = s;
ec766a51 1282 TAKE_PTR(s);
a71fe8b8 1283
da7e457c
LP
1284 return 0;
1285}
1286
97ef5391
LP
1287static void event_free_inotify_data(sd_event *e, struct inotify_data *d) {
1288 assert(e);
1289
1290 if (!d)
1291 return;
1292
1293 assert(hashmap_isempty(d->inodes));
1294 assert(hashmap_isempty(d->wd));
1295
1296 if (d->buffer_filled > 0)
1297 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
1298
1299 hashmap_free(d->inodes);
1300 hashmap_free(d->wd);
1301
1302 assert_se(hashmap_remove(e->inotify_data, &d->priority) == d);
1303
1304 if (d->fd >= 0) {
1305 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, d->fd, NULL) < 0)
1306 log_debug_errno(errno, "Failed to remove inotify fd from epoll, ignoring: %m");
1307
1308 safe_close(d->fd);
1309 }
1310 free(d);
1311}
1312
1313static int event_make_inotify_data(
1314 sd_event *e,
1315 int64_t priority,
1316 struct inotify_data **ret) {
1317
1318 _cleanup_close_ int fd = -1;
1319 struct inotify_data *d;
1320 struct epoll_event ev;
1321 int r;
1322
1323 assert(e);
1324
1325 d = hashmap_get(e->inotify_data, &priority);
1326 if (d) {
1327 if (ret)
1328 *ret = d;
1329 return 0;
1330 }
1331
1332 fd = inotify_init1(IN_NONBLOCK|O_CLOEXEC);
1333 if (fd < 0)
1334 return -errno;
1335
1336 fd = fd_move_above_stdio(fd);
1337
1338 r = hashmap_ensure_allocated(&e->inotify_data, &uint64_hash_ops);
1339 if (r < 0)
1340 return r;
1341
1342 d = new(struct inotify_data, 1);
1343 if (!d)
1344 return -ENOMEM;
1345
1346 *d = (struct inotify_data) {
1347 .wakeup = WAKEUP_INOTIFY_DATA,
1348 .fd = TAKE_FD(fd),
1349 .priority = priority,
1350 };
1351
1352 r = hashmap_put(e->inotify_data, &d->priority, d);
1353 if (r < 0) {
1354 d->fd = safe_close(d->fd);
1355 free(d);
1356 return r;
1357 }
1358
1359 ev = (struct epoll_event) {
1360 .events = EPOLLIN,
1361 .data.ptr = d,
1362 };
1363
1364 if (epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, d->fd, &ev) < 0) {
1365 r = -errno;
1366 d->fd = safe_close(d->fd); /* let's close this ourselves, as event_free_inotify_data() would otherwise
1367 * remove the fd from the epoll first, which we don't want as we couldn't
1368 * add it in the first place. */
1369 event_free_inotify_data(e, d);
1370 return r;
1371 }
1372
1373 if (ret)
1374 *ret = d;
1375
1376 return 1;
1377}
1378
7a08d314 1379static int inode_data_compare(const struct inode_data *x, const struct inode_data *y) {
90c88092 1380 int r;
97ef5391
LP
1381
1382 assert(x);
1383 assert(y);
1384
90c88092
YW
1385 r = CMP(x->dev, y->dev);
1386 if (r != 0)
1387 return r;
97ef5391 1388
6dd91b36 1389 return CMP(x->ino, y->ino);
97ef5391
LP
1390}
1391
7a08d314
YW
1392static void inode_data_hash_func(const struct inode_data *d, struct siphash *state) {
1393 assert(d);
97ef5391
LP
1394
1395 siphash24_compress(&d->dev, sizeof(d->dev), state);
1396 siphash24_compress(&d->ino, sizeof(d->ino), state);
1397}
1398
7a08d314 1399DEFINE_PRIVATE_HASH_OPS(inode_data_hash_ops, struct inode_data, inode_data_hash_func, inode_data_compare);
97ef5391
LP
1400
1401static void event_free_inode_data(
1402 sd_event *e,
1403 struct inode_data *d) {
1404
1405 assert(e);
1406
1407 if (!d)
1408 return;
1409
1410 assert(!d->event_sources);
1411
1412 if (d->fd >= 0) {
1413 LIST_REMOVE(to_close, e->inode_data_to_close, d);
1414 safe_close(d->fd);
1415 }
1416
1417 if (d->inotify_data) {
1418
1419 if (d->wd >= 0) {
1420 if (d->inotify_data->fd >= 0) {
1421 /* So here's a problem. At the time this runs the watch descriptor might already be
1422 * invalidated, because an IN_IGNORED event might be queued right the moment we enter
1423 * the syscall. Hence, whenever we get EINVAL, ignore it entirely, since it's a very
1424 * likely case to happen. */
1425
1426 if (inotify_rm_watch(d->inotify_data->fd, d->wd) < 0 && errno != EINVAL)
1427 log_debug_errno(errno, "Failed to remove watch descriptor %i from inotify, ignoring: %m", d->wd);
1428 }
1429
1430 assert_se(hashmap_remove(d->inotify_data->wd, INT_TO_PTR(d->wd)) == d);
1431 }
1432
1433 assert_se(hashmap_remove(d->inotify_data->inodes, d) == d);
1434 }
1435
1436 free(d);
1437}
1438
1439static void event_gc_inode_data(
1440 sd_event *e,
1441 struct inode_data *d) {
1442
1443 struct inotify_data *inotify_data;
1444
1445 assert(e);
1446
1447 if (!d)
1448 return;
1449
1450 if (d->event_sources)
1451 return;
1452
1453 inotify_data = d->inotify_data;
1454 event_free_inode_data(e, d);
1455
1456 if (inotify_data && hashmap_isempty(inotify_data->inodes))
1457 event_free_inotify_data(e, inotify_data);
1458}
1459
1460static int event_make_inode_data(
1461 sd_event *e,
1462 struct inotify_data *inotify_data,
1463 dev_t dev,
1464 ino_t ino,
1465 struct inode_data **ret) {
1466
1467 struct inode_data *d, key;
1468 int r;
1469
1470 assert(e);
1471 assert(inotify_data);
1472
1473 key = (struct inode_data) {
1474 .ino = ino,
1475 .dev = dev,
1476 };
1477
1478 d = hashmap_get(inotify_data->inodes, &key);
1479 if (d) {
1480 if (ret)
1481 *ret = d;
1482
1483 return 0;
1484 }
1485
1486 r = hashmap_ensure_allocated(&inotify_data->inodes, &inode_data_hash_ops);
1487 if (r < 0)
1488 return r;
1489
1490 d = new(struct inode_data, 1);
1491 if (!d)
1492 return -ENOMEM;
1493
1494 *d = (struct inode_data) {
1495 .dev = dev,
1496 .ino = ino,
1497 .wd = -1,
1498 .fd = -1,
1499 .inotify_data = inotify_data,
1500 };
1501
1502 r = hashmap_put(inotify_data->inodes, d, d);
1503 if (r < 0) {
1504 free(d);
1505 return r;
1506 }
1507
1508 if (ret)
1509 *ret = d;
1510
1511 return 1;
1512}
1513
1514static uint32_t inode_data_determine_mask(struct inode_data *d) {
1515 bool excl_unlink = true;
1516 uint32_t combined = 0;
1517 sd_event_source *s;
1518
1519 assert(d);
1520
1521 /* Combines the watch masks of all event sources watching this inode. We generally just OR them together, but
1522 * the IN_EXCL_UNLINK flag is ANDed instead.
1523 *
1524 * Note that we add all sources to the mask here, regardless whether enabled, disabled or oneshot. That's
1525 * because we cannot change the mask anymore after the event source was created once, since the kernel has no
f21f31b2 1526 * API for that. Hence we need to subscribe to the maximum mask we ever might be interested in, and suppress
97ef5391
LP
1527 * events we don't care for client-side. */
1528
1529 LIST_FOREACH(inotify.by_inode_data, s, d->event_sources) {
1530
1531 if ((s->inotify.mask & IN_EXCL_UNLINK) == 0)
1532 excl_unlink = false;
1533
1534 combined |= s->inotify.mask;
1535 }
1536
1537 return (combined & ~(IN_ONESHOT|IN_DONT_FOLLOW|IN_ONLYDIR|IN_EXCL_UNLINK)) | (excl_unlink ? IN_EXCL_UNLINK : 0);
1538}
1539
1540static int inode_data_realize_watch(sd_event *e, struct inode_data *d) {
1541 uint32_t combined_mask;
1542 int wd, r;
1543
1544 assert(d);
1545 assert(d->fd >= 0);
1546
1547 combined_mask = inode_data_determine_mask(d);
1548
1549 if (d->wd >= 0 && combined_mask == d->combined_mask)
1550 return 0;
1551
1552 r = hashmap_ensure_allocated(&d->inotify_data->wd, NULL);
1553 if (r < 0)
1554 return r;
1555
1556 wd = inotify_add_watch_fd(d->inotify_data->fd, d->fd, combined_mask);
1557 if (wd < 0)
1558 return -errno;
1559
1560 if (d->wd < 0) {
1561 r = hashmap_put(d->inotify_data->wd, INT_TO_PTR(wd), d);
1562 if (r < 0) {
1563 (void) inotify_rm_watch(d->inotify_data->fd, wd);
1564 return r;
1565 }
1566
1567 d->wd = wd;
1568
1569 } else if (d->wd != wd) {
1570
1571 log_debug("Weird, the watch descriptor we already knew for this inode changed?");
1572 (void) inotify_rm_watch(d->fd, wd);
1573 return -EINVAL;
1574 }
1575
1576 d->combined_mask = combined_mask;
1577 return 1;
1578}
1579
1580_public_ int sd_event_add_inotify(
1581 sd_event *e,
1582 sd_event_source **ret,
1583 const char *path,
1584 uint32_t mask,
1585 sd_event_inotify_handler_t callback,
1586 void *userdata) {
1587
97ef5391
LP
1588 struct inotify_data *inotify_data = NULL;
1589 struct inode_data *inode_data = NULL;
1590 _cleanup_close_ int fd = -1;
8c75fe17 1591 _cleanup_(source_freep) sd_event_source *s = NULL;
97ef5391
LP
1592 struct stat st;
1593 int r;
1594
1595 assert_return(e, -EINVAL);
1596 assert_return(e = event_resolve(e), -ENOPKG);
1597 assert_return(path, -EINVAL);
1598 assert_return(callback, -EINVAL);
1599 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
1600 assert_return(!event_pid_changed(e), -ECHILD);
1601
1602 /* Refuse IN_MASK_ADD since we coalesce watches on the same inode, and hence really don't want to merge
1603 * masks. Or in other words, this whole code exists only to manage IN_MASK_ADD type operations for you, hence
1604 * the user can't use them for us. */
1605 if (mask & IN_MASK_ADD)
1606 return -EINVAL;
1607
1608 fd = open(path, O_PATH|O_CLOEXEC|
1609 (mask & IN_ONLYDIR ? O_DIRECTORY : 0)|
1610 (mask & IN_DONT_FOLLOW ? O_NOFOLLOW : 0));
1611 if (fd < 0)
1612 return -errno;
1613
1614 if (fstat(fd, &st) < 0)
1615 return -errno;
1616
1617 s = source_new(e, !ret, SOURCE_INOTIFY);
1618 if (!s)
1619 return -ENOMEM;
1620
1621 s->enabled = mask & IN_ONESHOT ? SD_EVENT_ONESHOT : SD_EVENT_ON;
1622 s->inotify.mask = mask;
1623 s->inotify.callback = callback;
1624 s->userdata = userdata;
1625
1626 /* Allocate an inotify object for this priority, and an inode object within it */
1627 r = event_make_inotify_data(e, SD_EVENT_PRIORITY_NORMAL, &inotify_data);
1628 if (r < 0)
8c75fe17 1629 return r;
97ef5391
LP
1630
1631 r = event_make_inode_data(e, inotify_data, st.st_dev, st.st_ino, &inode_data);
8c75fe17
ZJS
1632 if (r < 0) {
1633 event_free_inotify_data(e, inotify_data);
1634 return r;
1635 }
97ef5391
LP
1636
1637 /* Keep the O_PATH fd around until the first iteration of the loop, so that we can still change the priority of
1638 * the event source, until then, for which we need the original inode. */
1639 if (inode_data->fd < 0) {
1640 inode_data->fd = TAKE_FD(fd);
1641 LIST_PREPEND(to_close, e->inode_data_to_close, inode_data);
1642 }
1643
1644 /* Link our event source to the inode data object */
1645 LIST_PREPEND(inotify.by_inode_data, inode_data->event_sources, s);
1646 s->inotify.inode_data = inode_data;
1647
97ef5391
LP
1648 /* Actually realize the watch now */
1649 r = inode_data_realize_watch(e, inode_data);
1650 if (r < 0)
8c75fe17 1651 return r;
97ef5391
LP
1652
1653 (void) sd_event_source_set_description(s, path);
1654
1655 if (ret)
1656 *ret = s;
8c75fe17 1657 TAKE_PTR(s);
97ef5391
LP
1658
1659 return 0;
97ef5391
LP
1660}
1661
8301aa0b 1662static sd_event_source* event_source_free(sd_event_source *s) {
6680dd6b
LP
1663 if (!s)
1664 return NULL;
da7e457c 1665
8301aa0b
YW
1666 /* Here's a special hack: when we are called from a
1667 * dispatch handler we won't free the event source
1668 * immediately, but we will detach the fd from the
1669 * epoll. This way it is safe for the caller to unref
1670 * the event source and immediately close the fd, but
1671 * we still retain a valid event source object after
1672 * the callback. */
fd38203a 1673
8301aa0b
YW
1674 if (s->dispatching) {
1675 if (s->type == SOURCE_IO)
1676 source_io_unregister(s);
fd38203a 1677
8301aa0b
YW
1678 source_disconnect(s);
1679 } else
1680 source_free(s);
fd38203a
LP
1681
1682 return NULL;
1683}
1684
8301aa0b
YW
1685DEFINE_PUBLIC_TRIVIAL_REF_UNREF_FUNC(sd_event_source, sd_event_source, event_source_free);
1686
356779df 1687_public_ int sd_event_source_set_description(sd_event_source *s, const char *description) {
f7f53e9e 1688 assert_return(s, -EINVAL);
f4b2933e 1689 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1690
356779df 1691 return free_and_strdup(&s->description, description);
f7f53e9e
TG
1692}
1693
356779df 1694_public_ int sd_event_source_get_description(sd_event_source *s, const char **description) {
f7f53e9e 1695 assert_return(s, -EINVAL);
356779df 1696 assert_return(description, -EINVAL);
f4b2933e 1697 assert_return(!event_pid_changed(s->event), -ECHILD);
f7f53e9e 1698
7d92a1a4
ZJS
1699 if (!s->description)
1700 return -ENXIO;
1701
356779df 1702 *description = s->description;
f7f53e9e
TG
1703 return 0;
1704}
1705
adcc4ca3 1706_public_ sd_event *sd_event_source_get_event(sd_event_source *s) {
305f78bf 1707 assert_return(s, NULL);
eaa3cbef
LP
1708
1709 return s->event;
1710}
1711
f7262a9f 1712_public_ int sd_event_source_get_pending(sd_event_source *s) {
305f78bf 1713 assert_return(s, -EINVAL);
6203e07a 1714 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c 1715 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1716 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1717
1718 return s->pending;
1719}
1720
f7262a9f 1721_public_ int sd_event_source_get_io_fd(sd_event_source *s) {
305f78bf
LP
1722 assert_return(s, -EINVAL);
1723 assert_return(s->type == SOURCE_IO, -EDOM);
1724 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1725
1726 return s->io.fd;
1727}
1728
30caf8f3
LP
1729_public_ int sd_event_source_set_io_fd(sd_event_source *s, int fd) {
1730 int r;
1731
1732 assert_return(s, -EINVAL);
8ac43fee 1733 assert_return(fd >= 0, -EBADF);
30caf8f3
LP
1734 assert_return(s->type == SOURCE_IO, -EDOM);
1735 assert_return(!event_pid_changed(s->event), -ECHILD);
1736
1737 if (s->io.fd == fd)
1738 return 0;
1739
1740 if (s->enabled == SD_EVENT_OFF) {
1741 s->io.fd = fd;
1742 s->io.registered = false;
1743 } else {
1744 int saved_fd;
1745
1746 saved_fd = s->io.fd;
1747 assert(s->io.registered);
1748
1749 s->io.fd = fd;
1750 s->io.registered = false;
1751
1752 r = source_io_register(s, s->enabled, s->io.events);
1753 if (r < 0) {
1754 s->io.fd = saved_fd;
1755 s->io.registered = true;
1756 return r;
1757 }
1758
1759 epoll_ctl(s->event->epoll_fd, EPOLL_CTL_DEL, saved_fd, NULL);
1760 }
1761
1762 return 0;
1763}
1764
ab93297c
NM
1765_public_ int sd_event_source_get_io_fd_own(sd_event_source *s) {
1766 assert_return(s, -EINVAL);
1767 assert_return(s->type == SOURCE_IO, -EDOM);
1768
1769 return s->io.owned;
1770}
1771
1772_public_ int sd_event_source_set_io_fd_own(sd_event_source *s, int own) {
1773 assert_return(s, -EINVAL);
1774 assert_return(s->type == SOURCE_IO, -EDOM);
1775
1776 s->io.owned = own;
1777 return 0;
1778}
1779
f7262a9f 1780_public_ int sd_event_source_get_io_events(sd_event_source *s, uint32_t* events) {
305f78bf
LP
1781 assert_return(s, -EINVAL);
1782 assert_return(events, -EINVAL);
1783 assert_return(s->type == SOURCE_IO, -EDOM);
1784 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1785
1786 *events = s->io.events;
1787 return 0;
1788}
1789
f7262a9f 1790_public_ int sd_event_source_set_io_events(sd_event_source *s, uint32_t events) {
fd38203a
LP
1791 int r;
1792
305f78bf
LP
1793 assert_return(s, -EINVAL);
1794 assert_return(s->type == SOURCE_IO, -EDOM);
2a16a986 1795 assert_return(!(events & ~(EPOLLIN|EPOLLOUT|EPOLLRDHUP|EPOLLPRI|EPOLLERR|EPOLLHUP|EPOLLET)), -EINVAL);
da7e457c 1796 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1797 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 1798
b63c8d4f
DH
1799 /* edge-triggered updates are never skipped, so we can reset edges */
1800 if (s->io.events == events && !(events & EPOLLET))
fd38203a
LP
1801 return 0;
1802
2a0dc6cd
LP
1803 r = source_set_pending(s, false);
1804 if (r < 0)
1805 return r;
1806
baf76283 1807 if (s->enabled != SD_EVENT_OFF) {
e4715127 1808 r = source_io_register(s, s->enabled, events);
fd38203a
LP
1809 if (r < 0)
1810 return r;
1811 }
1812
1813 s->io.events = events;
1814
1815 return 0;
1816}
1817
f7262a9f 1818_public_ int sd_event_source_get_io_revents(sd_event_source *s, uint32_t* revents) {
305f78bf
LP
1819 assert_return(s, -EINVAL);
1820 assert_return(revents, -EINVAL);
1821 assert_return(s->type == SOURCE_IO, -EDOM);
1822 assert_return(s->pending, -ENODATA);
1823 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1824
1825 *revents = s->io.revents;
1826 return 0;
1827}
1828
f7262a9f 1829_public_ int sd_event_source_get_signal(sd_event_source *s) {
305f78bf
LP
1830 assert_return(s, -EINVAL);
1831 assert_return(s->type == SOURCE_SIGNAL, -EDOM);
1832 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1833
1834 return s->signal.sig;
1835}
1836
31927c16 1837_public_ int sd_event_source_get_priority(sd_event_source *s, int64_t *priority) {
305f78bf
LP
1838 assert_return(s, -EINVAL);
1839 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 1840
6680b8d1
ME
1841 *priority = s->priority;
1842 return 0;
fd38203a
LP
1843}
1844
31927c16 1845_public_ int sd_event_source_set_priority(sd_event_source *s, int64_t priority) {
97ef5391
LP
1846 bool rm_inotify = false, rm_inode = false;
1847 struct inotify_data *new_inotify_data = NULL;
1848 struct inode_data *new_inode_data = NULL;
9da4cb2b
LP
1849 int r;
1850
305f78bf 1851 assert_return(s, -EINVAL);
da7e457c 1852 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 1853 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
1854
1855 if (s->priority == priority)
1856 return 0;
1857
97ef5391
LP
1858 if (s->type == SOURCE_INOTIFY) {
1859 struct inode_data *old_inode_data;
1860
1861 assert(s->inotify.inode_data);
1862 old_inode_data = s->inotify.inode_data;
1863
1864 /* We need the original fd to change the priority. If we don't have it we can't change the priority,
1865 * anymore. Note that we close any fds when entering the next event loop iteration, i.e. for inotify
1866 * events we allow priority changes only until the first following iteration. */
1867 if (old_inode_data->fd < 0)
1868 return -EOPNOTSUPP;
1869
1870 r = event_make_inotify_data(s->event, priority, &new_inotify_data);
1871 if (r < 0)
1872 return r;
1873 rm_inotify = r > 0;
1874
1875 r = event_make_inode_data(s->event, new_inotify_data, old_inode_data->dev, old_inode_data->ino, &new_inode_data);
1876 if (r < 0)
1877 goto fail;
1878 rm_inode = r > 0;
1879
1880 if (new_inode_data->fd < 0) {
1881 /* Duplicate the fd for the new inode object if we don't have any yet */
1882 new_inode_data->fd = fcntl(old_inode_data->fd, F_DUPFD_CLOEXEC, 3);
1883 if (new_inode_data->fd < 0) {
1884 r = -errno;
1885 goto fail;
1886 }
1887
1888 LIST_PREPEND(to_close, s->event->inode_data_to_close, new_inode_data);
1889 }
1890
1891 /* Move the event source to the new inode data structure */
1892 LIST_REMOVE(inotify.by_inode_data, old_inode_data->event_sources, s);
1893 LIST_PREPEND(inotify.by_inode_data, new_inode_data->event_sources, s);
1894 s->inotify.inode_data = new_inode_data;
1895
1896 /* Now create the new watch */
1897 r = inode_data_realize_watch(s->event, new_inode_data);
1898 if (r < 0) {
1899 /* Move it back */
1900 LIST_REMOVE(inotify.by_inode_data, new_inode_data->event_sources, s);
1901 LIST_PREPEND(inotify.by_inode_data, old_inode_data->event_sources, s);
1902 s->inotify.inode_data = old_inode_data;
1903 goto fail;
1904 }
1905
1906 s->priority = priority;
1907
1908 event_gc_inode_data(s->event, old_inode_data);
1909
1910 } else if (s->type == SOURCE_SIGNAL && s->enabled != SD_EVENT_OFF) {
9da4cb2b
LP
1911 struct signal_data *old, *d;
1912
1913 /* Move us from the signalfd belonging to the old
1914 * priority to the signalfd of the new priority */
1915
1916 assert_se(old = hashmap_get(s->event->signal_data, &s->priority));
1917
1918 s->priority = priority;
1919
1920 r = event_make_signal_data(s->event, s->signal.sig, &d);
1921 if (r < 0) {
1922 s->priority = old->priority;
1923 return r;
1924 }
1925
1926 event_unmask_signal_data(s->event, old, s->signal.sig);
1927 } else
1928 s->priority = priority;
fd38203a
LP
1929
1930 if (s->pending)
c2ba3ad6 1931 prioq_reshuffle(s->event->pending, s, &s->pending_index);
fd38203a
LP
1932
1933 if (s->prepare)
c2ba3ad6 1934 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
fd38203a 1935
6203e07a
LP
1936 if (s->type == SOURCE_EXIT)
1937 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf 1938
fd38203a 1939 return 0;
97ef5391
LP
1940
1941fail:
1942 if (rm_inode)
1943 event_free_inode_data(s->event, new_inode_data);
1944
1945 if (rm_inotify)
1946 event_free_inotify_data(s->event, new_inotify_data);
1947
1948 return r;
fd38203a
LP
1949}
1950
f7262a9f 1951_public_ int sd_event_source_get_enabled(sd_event_source *s, int *m) {
305f78bf 1952 assert_return(s, -EINVAL);
305f78bf 1953 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 1954
08c1eb0e
ZJS
1955 if (m)
1956 *m = s->enabled;
1957 return s->enabled != SD_EVENT_OFF;
fd38203a
LP
1958}
1959
f7262a9f 1960_public_ int sd_event_source_set_enabled(sd_event_source *s, int m) {
fd38203a
LP
1961 int r;
1962
305f78bf 1963 assert_return(s, -EINVAL);
945c2931 1964 assert_return(IN_SET(m, SD_EVENT_OFF, SD_EVENT_ON, SD_EVENT_ONESHOT), -EINVAL);
305f78bf 1965 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 1966
cc567911
LP
1967 /* If we are dead anyway, we are fine with turning off
1968 * sources, but everything else needs to fail. */
1969 if (s->event->state == SD_EVENT_FINISHED)
1970 return m == SD_EVENT_OFF ? 0 : -ESTALE;
1971
baf76283 1972 if (s->enabled == m)
fd38203a
LP
1973 return 0;
1974
baf76283 1975 if (m == SD_EVENT_OFF) {
fd38203a 1976
ac989a78
LP
1977 /* Unset the pending flag when this event source is disabled */
1978 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
1979 r = source_set_pending(s, false);
1980 if (r < 0)
1981 return r;
1982 }
1983
fd38203a
LP
1984 switch (s->type) {
1985
1986 case SOURCE_IO:
366e6411 1987 source_io_unregister(s);
baf76283 1988 s->enabled = m;
fd38203a
LP
1989 break;
1990
6a0f1f6d 1991 case SOURCE_TIME_REALTIME:
a8548816 1992 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
1993 case SOURCE_TIME_MONOTONIC:
1994 case SOURCE_TIME_REALTIME_ALARM:
1995 case SOURCE_TIME_BOOTTIME_ALARM: {
1996 struct clock_data *d;
fd38203a 1997
baf76283 1998 s->enabled = m;
6a0f1f6d
LP
1999 d = event_get_clock_data(s->event, s->type);
2000 assert(d);
2001
2002 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2003 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2004 d->needs_rearm = true;
fd38203a 2005 break;
6a0f1f6d 2006 }
fd38203a
LP
2007
2008 case SOURCE_SIGNAL:
baf76283 2009 s->enabled = m;
4807d2d0 2010
9da4cb2b 2011 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
fd38203a
LP
2012 break;
2013
2014 case SOURCE_CHILD:
baf76283 2015 s->enabled = m;
fd38203a 2016
baf76283
LP
2017 assert(s->event->n_enabled_child_sources > 0);
2018 s->event->n_enabled_child_sources--;
fd38203a 2019
9da4cb2b 2020 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
fd38203a
LP
2021 break;
2022
6203e07a 2023 case SOURCE_EXIT:
305f78bf 2024 s->enabled = m;
6203e07a 2025 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf
LP
2026 break;
2027
2028 case SOURCE_DEFER:
6e9feda3 2029 case SOURCE_POST:
97ef5391 2030 case SOURCE_INOTIFY:
baf76283 2031 s->enabled = m;
fd38203a 2032 break;
9d3e3aa5 2033
6a0f1f6d 2034 default:
9d3e3aa5 2035 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
2036 }
2037
2038 } else {
ac989a78
LP
2039
2040 /* Unset the pending flag when this event source is enabled */
2041 if (s->enabled == SD_EVENT_OFF && !IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
2042 r = source_set_pending(s, false);
2043 if (r < 0)
2044 return r;
2045 }
2046
fd38203a
LP
2047 switch (s->type) {
2048
2049 case SOURCE_IO:
2050 r = source_io_register(s, m, s->io.events);
2051 if (r < 0)
2052 return r;
2053
baf76283 2054 s->enabled = m;
fd38203a
LP
2055 break;
2056
6a0f1f6d 2057 case SOURCE_TIME_REALTIME:
a8548816 2058 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
2059 case SOURCE_TIME_MONOTONIC:
2060 case SOURCE_TIME_REALTIME_ALARM:
2061 case SOURCE_TIME_BOOTTIME_ALARM: {
2062 struct clock_data *d;
fd38203a 2063
baf76283 2064 s->enabled = m;
6a0f1f6d
LP
2065 d = event_get_clock_data(s->event, s->type);
2066 assert(d);
2067
2068 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2069 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2070 d->needs_rearm = true;
fd38203a 2071 break;
6a0f1f6d 2072 }
fd38203a
LP
2073
2074 case SOURCE_SIGNAL:
4807d2d0
ZJS
2075
2076 s->enabled = m;
9da4cb2b
LP
2077
2078 r = event_make_signal_data(s->event, s->signal.sig, NULL);
2079 if (r < 0) {
2080 s->enabled = SD_EVENT_OFF;
2081 event_gc_signal_data(s->event, &s->priority, s->signal.sig);
2082 return r;
2083 }
2084
fd38203a
LP
2085 break;
2086
2087 case SOURCE_CHILD:
4807d2d0 2088
9da4cb2b 2089 if (s->enabled == SD_EVENT_OFF)
4807d2d0 2090 s->event->n_enabled_child_sources++;
7a0d4a3d
DH
2091
2092 s->enabled = m;
9da4cb2b 2093
10edebf6 2094 r = event_make_signal_data(s->event, SIGCHLD, NULL);
9da4cb2b
LP
2095 if (r < 0) {
2096 s->enabled = SD_EVENT_OFF;
2097 s->event->n_enabled_child_sources--;
2098 event_gc_signal_data(s->event, &s->priority, SIGCHLD);
2099 return r;
2100 }
2101
fd38203a
LP
2102 break;
2103
6203e07a 2104 case SOURCE_EXIT:
305f78bf 2105 s->enabled = m;
6203e07a 2106 prioq_reshuffle(s->event->exit, s, &s->exit.prioq_index);
305f78bf
LP
2107 break;
2108
2109 case SOURCE_DEFER:
6e9feda3 2110 case SOURCE_POST:
97ef5391 2111 case SOURCE_INOTIFY:
baf76283 2112 s->enabled = m;
fd38203a 2113 break;
9d3e3aa5 2114
6a0f1f6d 2115 default:
9d3e3aa5 2116 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
2117 }
2118 }
2119
2120 if (s->pending)
2121 prioq_reshuffle(s->event->pending, s, &s->pending_index);
2122
2123 if (s->prepare)
2124 prioq_reshuffle(s->event->prepare, s, &s->prepare_index);
2125
2126 return 0;
2127}
2128
f7262a9f 2129_public_ int sd_event_source_get_time(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2130 assert_return(s, -EINVAL);
2131 assert_return(usec, -EINVAL);
6a0f1f6d 2132 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf 2133 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2134
2135 *usec = s->time.next;
2136 return 0;
2137}
2138
f7262a9f 2139_public_ int sd_event_source_set_time(sd_event_source *s, uint64_t usec) {
6a0f1f6d 2140 struct clock_data *d;
2a0dc6cd 2141 int r;
6a0f1f6d 2142
305f78bf 2143 assert_return(s, -EINVAL);
6a0f1f6d 2144 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2145 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2146 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a 2147
2a0dc6cd
LP
2148 r = source_set_pending(s, false);
2149 if (r < 0)
2150 return r;
2576a19e 2151
2a0dc6cd 2152 s->time.next = usec;
fd38203a 2153
6a0f1f6d
LP
2154 d = event_get_clock_data(s->event, s->type);
2155 assert(d);
2156
2157 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2158 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2159 d->needs_rearm = true;
fd38203a
LP
2160
2161 return 0;
2162}
2163
f7262a9f 2164_public_ int sd_event_source_get_time_accuracy(sd_event_source *s, uint64_t *usec) {
305f78bf
LP
2165 assert_return(s, -EINVAL);
2166 assert_return(usec, -EINVAL);
6a0f1f6d 2167 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
305f78bf
LP
2168 assert_return(!event_pid_changed(s->event), -ECHILD);
2169
2170 *usec = s->time.accuracy;
2171 return 0;
2172}
2173
f7262a9f 2174_public_ int sd_event_source_set_time_accuracy(sd_event_source *s, uint64_t usec) {
6a0f1f6d 2175 struct clock_data *d;
2a0dc6cd 2176 int r;
6a0f1f6d 2177
305f78bf
LP
2178 assert_return(s, -EINVAL);
2179 assert_return(usec != (uint64_t) -1, -EINVAL);
6a0f1f6d 2180 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
da7e457c 2181 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
305f78bf 2182 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2183
2a0dc6cd
LP
2184 r = source_set_pending(s, false);
2185 if (r < 0)
2186 return r;
2187
eaa3cbef
LP
2188 if (usec == 0)
2189 usec = DEFAULT_ACCURACY_USEC;
2190
eaa3cbef
LP
2191 s->time.accuracy = usec;
2192
6a0f1f6d
LP
2193 d = event_get_clock_data(s->event, s->type);
2194 assert(d);
2195
2196 prioq_reshuffle(d->latest, s, &s->time.latest_index);
212bbb17 2197 d->needs_rearm = true;
6a0f1f6d
LP
2198
2199 return 0;
2200}
2201
2202_public_ int sd_event_source_get_time_clock(sd_event_source *s, clockid_t *clock) {
2203 assert_return(s, -EINVAL);
2204 assert_return(clock, -EINVAL);
2205 assert_return(EVENT_SOURCE_IS_TIME(s->type), -EDOM);
2206 assert_return(!event_pid_changed(s->event), -ECHILD);
eaa3cbef 2207
6a0f1f6d 2208 *clock = event_source_type_to_clock(s->type);
eaa3cbef
LP
2209 return 0;
2210}
2211
f7262a9f 2212_public_ int sd_event_source_get_child_pid(sd_event_source *s, pid_t *pid) {
4bee8012
LP
2213 assert_return(s, -EINVAL);
2214 assert_return(pid, -EINVAL);
2215 assert_return(s->type == SOURCE_CHILD, -EDOM);
2216 assert_return(!event_pid_changed(s->event), -ECHILD);
2217
2218 *pid = s->child.pid;
2219 return 0;
2220}
2221
97ef5391
LP
2222_public_ int sd_event_source_get_inotify_mask(sd_event_source *s, uint32_t *mask) {
2223 assert_return(s, -EINVAL);
2224 assert_return(mask, -EINVAL);
2225 assert_return(s->type == SOURCE_INOTIFY, -EDOM);
2226 assert_return(!event_pid_changed(s->event), -ECHILD);
2227
2228 *mask = s->inotify.mask;
2229 return 0;
2230}
2231
718db961 2232_public_ int sd_event_source_set_prepare(sd_event_source *s, sd_event_handler_t callback) {
fd38203a
LP
2233 int r;
2234
da7e457c 2235 assert_return(s, -EINVAL);
6203e07a 2236 assert_return(s->type != SOURCE_EXIT, -EDOM);
da7e457c
LP
2237 assert_return(s->event->state != SD_EVENT_FINISHED, -ESTALE);
2238 assert_return(!event_pid_changed(s->event), -ECHILD);
fd38203a
LP
2239
2240 if (s->prepare == callback)
2241 return 0;
2242
2243 if (callback && s->prepare) {
2244 s->prepare = callback;
2245 return 0;
2246 }
2247
2248 r = prioq_ensure_allocated(&s->event->prepare, prepare_prioq_compare);
2249 if (r < 0)
2250 return r;
2251
2252 s->prepare = callback;
2253
2254 if (callback) {
2255 r = prioq_put(s->event->prepare, s, &s->prepare_index);
2256 if (r < 0)
2257 return r;
2258 } else
2259 prioq_remove(s->event->prepare, s, &s->prepare_index);
2260
2261 return 0;
2262}
2263
f7262a9f 2264_public_ void* sd_event_source_get_userdata(sd_event_source *s) {
da7e457c 2265 assert_return(s, NULL);
fd38203a
LP
2266
2267 return s->userdata;
2268}
2269
8f726607
LP
2270_public_ void *sd_event_source_set_userdata(sd_event_source *s, void *userdata) {
2271 void *ret;
2272
2273 assert_return(s, NULL);
2274
2275 ret = s->userdata;
2276 s->userdata = userdata;
2277
2278 return ret;
2279}
2280
c2ba3ad6
LP
2281static usec_t sleep_between(sd_event *e, usec_t a, usec_t b) {
2282 usec_t c;
2283 assert(e);
2284 assert(a <= b);
2285
2286 if (a <= 0)
2287 return 0;
393003e1
LP
2288 if (a >= USEC_INFINITY)
2289 return USEC_INFINITY;
c2ba3ad6
LP
2290
2291 if (b <= a + 1)
2292 return a;
2293
52444dc4
LP
2294 initialize_perturb(e);
2295
c2ba3ad6
LP
2296 /*
2297 Find a good time to wake up again between times a and b. We
2298 have two goals here:
2299
2300 a) We want to wake up as seldom as possible, hence prefer
2301 later times over earlier times.
2302
2303 b) But if we have to wake up, then let's make sure to
2304 dispatch as much as possible on the entire system.
2305
2306 We implement this by waking up everywhere at the same time
850516e0 2307 within any given minute if we can, synchronised via the
c2ba3ad6 2308 perturbation value determined from the boot ID. If we can't,
ba276c81
LP
2309 then we try to find the same spot in every 10s, then 1s and
2310 then 250ms step. Otherwise, we pick the last possible time
2311 to wake up.
c2ba3ad6
LP
2312 */
2313
850516e0
LP
2314 c = (b / USEC_PER_MINUTE) * USEC_PER_MINUTE + e->perturb;
2315 if (c >= b) {
2316 if (_unlikely_(c < USEC_PER_MINUTE))
2317 return b;
2318
2319 c -= USEC_PER_MINUTE;
2320 }
2321
ba276c81
LP
2322 if (c >= a)
2323 return c;
2324
2325 c = (b / (USEC_PER_SEC*10)) * (USEC_PER_SEC*10) + (e->perturb % (USEC_PER_SEC*10));
2326 if (c >= b) {
2327 if (_unlikely_(c < USEC_PER_SEC*10))
2328 return b;
2329
2330 c -= USEC_PER_SEC*10;
2331 }
2332
850516e0
LP
2333 if (c >= a)
2334 return c;
2335
2336 c = (b / USEC_PER_SEC) * USEC_PER_SEC + (e->perturb % USEC_PER_SEC);
c2ba3ad6
LP
2337 if (c >= b) {
2338 if (_unlikely_(c < USEC_PER_SEC))
2339 return b;
2340
2341 c -= USEC_PER_SEC;
2342 }
2343
2344 if (c >= a)
2345 return c;
2346
2347 c = (b / (USEC_PER_MSEC*250)) * (USEC_PER_MSEC*250) + (e->perturb % (USEC_PER_MSEC*250));
2348 if (c >= b) {
2349 if (_unlikely_(c < USEC_PER_MSEC*250))
2350 return b;
2351
2352 c -= USEC_PER_MSEC*250;
2353 }
2354
2355 if (c >= a)
2356 return c;
2357
2358 return b;
2359}
2360
fd38203a
LP
2361static int event_arm_timer(
2362 sd_event *e,
6a0f1f6d 2363 struct clock_data *d) {
fd38203a
LP
2364
2365 struct itimerspec its = {};
c2ba3ad6
LP
2366 sd_event_source *a, *b;
2367 usec_t t;
fd38203a
LP
2368 int r;
2369
cde93897 2370 assert(e);
6a0f1f6d 2371 assert(d);
fd38203a 2372
d06441da 2373 if (!d->needs_rearm)
212bbb17
TG
2374 return 0;
2375 else
2376 d->needs_rearm = false;
2377
6a0f1f6d 2378 a = prioq_peek(d->earliest);
393003e1 2379 if (!a || a->enabled == SD_EVENT_OFF || a->time.next == USEC_INFINITY) {
72aedc1e 2380
6a0f1f6d 2381 if (d->fd < 0)
c57b5ca3
LP
2382 return 0;
2383
3a43da28 2384 if (d->next == USEC_INFINITY)
72aedc1e
LP
2385 return 0;
2386
2387 /* disarm */
6a0f1f6d 2388 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
72aedc1e
LP
2389 if (r < 0)
2390 return r;
2391
3a43da28 2392 d->next = USEC_INFINITY;
fd38203a 2393 return 0;
72aedc1e 2394 }
fd38203a 2395
6a0f1f6d 2396 b = prioq_peek(d->latest);
baf76283 2397 assert_se(b && b->enabled != SD_EVENT_OFF);
c2ba3ad6 2398
1bce0ffa 2399 t = sleep_between(e, a->time.next, time_event_source_latest(b));
6a0f1f6d 2400 if (d->next == t)
fd38203a
LP
2401 return 0;
2402
6a0f1f6d 2403 assert_se(d->fd >= 0);
fd38203a 2404
c2ba3ad6 2405 if (t == 0) {
fd38203a
LP
2406 /* We don' want to disarm here, just mean some time looooong ago. */
2407 its.it_value.tv_sec = 0;
2408 its.it_value.tv_nsec = 1;
2409 } else
c2ba3ad6 2410 timespec_store(&its.it_value, t);
fd38203a 2411
6a0f1f6d 2412 r = timerfd_settime(d->fd, TFD_TIMER_ABSTIME, &its, NULL);
fd38203a 2413 if (r < 0)
cde93897 2414 return -errno;
fd38203a 2415
6a0f1f6d 2416 d->next = t;
fd38203a
LP
2417 return 0;
2418}
2419
9a800b56 2420static int process_io(sd_event *e, sd_event_source *s, uint32_t revents) {
fd38203a
LP
2421 assert(e);
2422 assert(s);
2423 assert(s->type == SOURCE_IO);
2424
9a800b56
LP
2425 /* If the event source was already pending, we just OR in the
2426 * new revents, otherwise we reset the value. The ORing is
2427 * necessary to handle EPOLLONESHOT events properly where
2428 * readability might happen independently of writability, and
2429 * we need to keep track of both */
2430
2431 if (s->pending)
2432 s->io.revents |= revents;
2433 else
2434 s->io.revents = revents;
fd38203a 2435
fd38203a
LP
2436 return source_set_pending(s, true);
2437}
2438
72aedc1e 2439static int flush_timer(sd_event *e, int fd, uint32_t events, usec_t *next) {
fd38203a
LP
2440 uint64_t x;
2441 ssize_t ss;
2442
2443 assert(e);
da7e457c 2444 assert(fd >= 0);
72aedc1e 2445
305f78bf 2446 assert_return(events == EPOLLIN, -EIO);
fd38203a
LP
2447
2448 ss = read(fd, &x, sizeof(x));
2449 if (ss < 0) {
945c2931 2450 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
2451 return 0;
2452
2453 return -errno;
2454 }
2455
8d35dae7 2456 if (_unlikely_(ss != sizeof(x)))
fd38203a
LP
2457 return -EIO;
2458
cde93897 2459 if (next)
3a43da28 2460 *next = USEC_INFINITY;
72aedc1e 2461
fd38203a
LP
2462 return 0;
2463}
2464
305f78bf
LP
2465static int process_timer(
2466 sd_event *e,
2467 usec_t n,
6a0f1f6d 2468 struct clock_data *d) {
305f78bf 2469
fd38203a
LP
2470 sd_event_source *s;
2471 int r;
2472
2473 assert(e);
6a0f1f6d 2474 assert(d);
fd38203a
LP
2475
2476 for (;;) {
6a0f1f6d 2477 s = prioq_peek(d->earliest);
fd38203a
LP
2478 if (!s ||
2479 s->time.next > n ||
baf76283 2480 s->enabled == SD_EVENT_OFF ||
fd38203a
LP
2481 s->pending)
2482 break;
2483
2484 r = source_set_pending(s, true);
2485 if (r < 0)
2486 return r;
2487
6a0f1f6d
LP
2488 prioq_reshuffle(d->earliest, s, &s->time.earliest_index);
2489 prioq_reshuffle(d->latest, s, &s->time.latest_index);
e07bbb7c 2490 d->needs_rearm = true;
fd38203a
LP
2491 }
2492
2493 return 0;
2494}
2495
2496static int process_child(sd_event *e) {
2497 sd_event_source *s;
2498 Iterator i;
2499 int r;
2500
2501 assert(e);
2502
c2ba3ad6
LP
2503 e->need_process_child = false;
2504
fd38203a
LP
2505 /*
2506 So, this is ugly. We iteratively invoke waitid() with P_PID
2507 + WNOHANG for each PID we wait for, instead of using
2508 P_ALL. This is because we only want to get child
2509 information of very specific child processes, and not all
2510 of them. We might not have processed the SIGCHLD even of a
2511 previous invocation and we don't want to maintain a
2512 unbounded *per-child* event queue, hence we really don't
2513 want anything flushed out of the kernel's queue that we
2514 don't care about. Since this is O(n) this means that if you
2515 have a lot of processes you probably want to handle SIGCHLD
2516 yourself.
08cd1552
LP
2517
2518 We do not reap the children here (by using WNOWAIT), this
2519 is only done after the event source is dispatched so that
2520 the callback still sees the process as a zombie.
fd38203a
LP
2521 */
2522
2523 HASHMAP_FOREACH(s, e->child_sources, i) {
2524 assert(s->type == SOURCE_CHILD);
2525
2526 if (s->pending)
2527 continue;
2528
baf76283 2529 if (s->enabled == SD_EVENT_OFF)
fd38203a
LP
2530 continue;
2531
2532 zero(s->child.siginfo);
08cd1552
LP
2533 r = waitid(P_PID, s->child.pid, &s->child.siginfo,
2534 WNOHANG | (s->child.options & WEXITED ? WNOWAIT : 0) | s->child.options);
fd38203a
LP
2535 if (r < 0)
2536 return -errno;
2537
2538 if (s->child.siginfo.si_pid != 0) {
945c2931 2539 bool zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552
LP
2540
2541 if (!zombie && (s->child.options & WEXITED)) {
2542 /* If the child isn't dead then let's
2543 * immediately remove the state change
2544 * from the queue, since there's no
2545 * benefit in leaving it queued */
2546
2547 assert(s->child.options & (WSTOPPED|WCONTINUED));
2548 waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|(s->child.options & (WSTOPPED|WCONTINUED)));
2549 }
2550
fd38203a
LP
2551 r = source_set_pending(s, true);
2552 if (r < 0)
2553 return r;
2554 }
2555 }
2556
fd38203a
LP
2557 return 0;
2558}
2559
9da4cb2b 2560static int process_signal(sd_event *e, struct signal_data *d, uint32_t events) {
fd38203a 2561 bool read_one = false;
fd38203a
LP
2562 int r;
2563
da7e457c 2564 assert(e);
97ef5391 2565 assert(d);
305f78bf 2566 assert_return(events == EPOLLIN, -EIO);
fd38203a 2567
9da4cb2b
LP
2568 /* If there's a signal queued on this priority and SIGCHLD is
2569 on this priority too, then make sure to recheck the
2570 children we watch. This is because we only ever dequeue
2571 the first signal per priority, and if we dequeue one, and
2572 SIGCHLD might be enqueued later we wouldn't know, but we
2573 might have higher priority children we care about hence we
2574 need to check that explicitly. */
2575
2576 if (sigismember(&d->sigset, SIGCHLD))
2577 e->need_process_child = true;
2578
2579 /* If there's already an event source pending for this
2580 * priority we don't read another */
2581 if (d->current)
2582 return 0;
2583
fd38203a 2584 for (;;) {
0eb2e0e3 2585 struct signalfd_siginfo si;
7057bd99 2586 ssize_t n;
92daebc0 2587 sd_event_source *s = NULL;
fd38203a 2588
9da4cb2b 2589 n = read(d->fd, &si, sizeof(si));
7057bd99 2590 if (n < 0) {
945c2931 2591 if (IN_SET(errno, EAGAIN, EINTR))
fd38203a
LP
2592 return read_one;
2593
2594 return -errno;
2595 }
2596
7057bd99 2597 if (_unlikely_(n != sizeof(si)))
fd38203a
LP
2598 return -EIO;
2599
6eb7c172 2600 assert(SIGNAL_VALID(si.ssi_signo));
7057bd99 2601
fd38203a
LP
2602 read_one = true;
2603
92daebc0
LP
2604 if (e->signal_sources)
2605 s = e->signal_sources[si.ssi_signo];
92daebc0
LP
2606 if (!s)
2607 continue;
9da4cb2b
LP
2608 if (s->pending)
2609 continue;
fd38203a
LP
2610
2611 s->signal.siginfo = si;
9da4cb2b
LP
2612 d->current = s;
2613
fd38203a
LP
2614 r = source_set_pending(s, true);
2615 if (r < 0)
2616 return r;
9da4cb2b
LP
2617
2618 return 1;
fd38203a 2619 }
fd38203a
LP
2620}
2621
97ef5391
LP
2622static int event_inotify_data_read(sd_event *e, struct inotify_data *d, uint32_t revents) {
2623 ssize_t n;
2624
2625 assert(e);
2626 assert(d);
2627
2628 assert_return(revents == EPOLLIN, -EIO);
2629
2630 /* If there's already an event source pending for this priority, don't read another */
2631 if (d->n_pending > 0)
2632 return 0;
2633
2634 /* Is the read buffer non-empty? If so, let's not read more */
2635 if (d->buffer_filled > 0)
2636 return 0;
2637
2638 n = read(d->fd, &d->buffer, sizeof(d->buffer));
2639 if (n < 0) {
2640 if (IN_SET(errno, EAGAIN, EINTR))
2641 return 0;
2642
2643 return -errno;
2644 }
2645
2646 assert(n > 0);
2647 d->buffer_filled = (size_t) n;
2648 LIST_PREPEND(buffered, e->inotify_data_buffered, d);
2649
2650 return 1;
2651}
2652
2653static void event_inotify_data_drop(sd_event *e, struct inotify_data *d, size_t sz) {
2654 assert(e);
2655 assert(d);
2656 assert(sz <= d->buffer_filled);
2657
2658 if (sz == 0)
2659 return;
2660
2661 /* Move the rest to the buffer to the front, in order to get things properly aligned again */
2662 memmove(d->buffer.raw, d->buffer.raw + sz, d->buffer_filled - sz);
2663 d->buffer_filled -= sz;
2664
2665 if (d->buffer_filled == 0)
2666 LIST_REMOVE(buffered, e->inotify_data_buffered, d);
2667}
2668
2669static int event_inotify_data_process(sd_event *e, struct inotify_data *d) {
2670 int r;
2671
2672 assert(e);
2673 assert(d);
2674
2675 /* If there's already an event source pending for this priority, don't read another */
2676 if (d->n_pending > 0)
2677 return 0;
2678
2679 while (d->buffer_filled > 0) {
2680 size_t sz;
2681
2682 /* Let's validate that the event structures are complete */
2683 if (d->buffer_filled < offsetof(struct inotify_event, name))
2684 return -EIO;
2685
2686 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2687 if (d->buffer_filled < sz)
2688 return -EIO;
2689
2690 if (d->buffer.ev.mask & IN_Q_OVERFLOW) {
2691 struct inode_data *inode_data;
2692 Iterator i;
2693
2694 /* The queue overran, let's pass this event to all event sources connected to this inotify
2695 * object */
2696
2697 HASHMAP_FOREACH(inode_data, d->inodes, i) {
2698 sd_event_source *s;
2699
2700 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2701
2702 if (s->enabled == SD_EVENT_OFF)
2703 continue;
2704
2705 r = source_set_pending(s, true);
2706 if (r < 0)
2707 return r;
2708 }
2709 }
2710 } else {
2711 struct inode_data *inode_data;
2712 sd_event_source *s;
2713
2714 /* Find the inode object for this watch descriptor. If IN_IGNORED is set we also remove it from
2715 * our watch descriptor table. */
2716 if (d->buffer.ev.mask & IN_IGNORED) {
2717
2718 inode_data = hashmap_remove(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2719 if (!inode_data) {
2720 event_inotify_data_drop(e, d, sz);
2721 continue;
2722 }
2723
2724 /* The watch descriptor was removed by the kernel, let's drop it here too */
2725 inode_data->wd = -1;
2726 } else {
2727 inode_data = hashmap_get(d->wd, INT_TO_PTR(d->buffer.ev.wd));
2728 if (!inode_data) {
2729 event_inotify_data_drop(e, d, sz);
2730 continue;
2731 }
2732 }
2733
2734 /* Trigger all event sources that are interested in these events. Also trigger all event
2735 * sources if IN_IGNORED or IN_UNMOUNT is set. */
2736 LIST_FOREACH(inotify.by_inode_data, s, inode_data->event_sources) {
2737
2738 if (s->enabled == SD_EVENT_OFF)
2739 continue;
2740
2741 if ((d->buffer.ev.mask & (IN_IGNORED|IN_UNMOUNT)) == 0 &&
2742 (s->inotify.mask & d->buffer.ev.mask & IN_ALL_EVENTS) == 0)
2743 continue;
2744
2745 r = source_set_pending(s, true);
2746 if (r < 0)
2747 return r;
2748 }
2749 }
2750
2751 /* Something pending now? If so, let's finish, otherwise let's read more. */
2752 if (d->n_pending > 0)
2753 return 1;
2754 }
2755
2756 return 0;
2757}
2758
2759static int process_inotify(sd_event *e) {
2760 struct inotify_data *d;
2761 int r, done = 0;
2762
2763 assert(e);
2764
2765 LIST_FOREACH(buffered, d, e->inotify_data_buffered) {
2766 r = event_inotify_data_process(e, d);
2767 if (r < 0)
2768 return r;
2769 if (r > 0)
2770 done ++;
2771 }
2772
2773 return done;
2774}
2775
fd38203a 2776static int source_dispatch(sd_event_source *s) {
8f5c235d 2777 EventSourceType saved_type;
fe8245eb 2778 int r = 0;
fd38203a
LP
2779
2780 assert(s);
6203e07a 2781 assert(s->pending || s->type == SOURCE_EXIT);
fd38203a 2782
8f5c235d
LP
2783 /* Save the event source type, here, so that we still know it after the event callback which might invalidate
2784 * the event. */
2785 saved_type = s->type;
2786
945c2931 2787 if (!IN_SET(s->type, SOURCE_DEFER, SOURCE_EXIT)) {
da7e457c
LP
2788 r = source_set_pending(s, false);
2789 if (r < 0)
2790 return r;
2791 }
fd38203a 2792
6e9feda3
LP
2793 if (s->type != SOURCE_POST) {
2794 sd_event_source *z;
2795 Iterator i;
2796
2797 /* If we execute a non-post source, let's mark all
2798 * post sources as pending */
2799
2800 SET_FOREACH(z, s->event->post_sources, i) {
2801 if (z->enabled == SD_EVENT_OFF)
2802 continue;
2803
2804 r = source_set_pending(z, true);
2805 if (r < 0)
2806 return r;
2807 }
2808 }
2809
baf76283
LP
2810 if (s->enabled == SD_EVENT_ONESHOT) {
2811 r = sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
2812 if (r < 0)
2813 return r;
2814 }
2815
12179984 2816 s->dispatching = true;
b7484e2a 2817
fd38203a
LP
2818 switch (s->type) {
2819
2820 case SOURCE_IO:
2821 r = s->io.callback(s, s->io.fd, s->io.revents, s->userdata);
2822 break;
2823
6a0f1f6d 2824 case SOURCE_TIME_REALTIME:
a8548816 2825 case SOURCE_TIME_BOOTTIME:
6a0f1f6d
LP
2826 case SOURCE_TIME_MONOTONIC:
2827 case SOURCE_TIME_REALTIME_ALARM:
2828 case SOURCE_TIME_BOOTTIME_ALARM:
fd38203a
LP
2829 r = s->time.callback(s, s->time.next, s->userdata);
2830 break;
2831
2832 case SOURCE_SIGNAL:
2833 r = s->signal.callback(s, &s->signal.siginfo, s->userdata);
2834 break;
2835
08cd1552
LP
2836 case SOURCE_CHILD: {
2837 bool zombie;
2838
945c2931 2839 zombie = IN_SET(s->child.siginfo.si_code, CLD_EXITED, CLD_KILLED, CLD_DUMPED);
08cd1552 2840
fd38203a 2841 r = s->child.callback(s, &s->child.siginfo, s->userdata);
08cd1552
LP
2842
2843 /* Now, reap the PID for good. */
2844 if (zombie)
cc59d290 2845 (void) waitid(P_PID, s->child.pid, &s->child.siginfo, WNOHANG|WEXITED);
08cd1552 2846
fd38203a 2847 break;
08cd1552 2848 }
fd38203a
LP
2849
2850 case SOURCE_DEFER:
2851 r = s->defer.callback(s, s->userdata);
2852 break;
da7e457c 2853
6e9feda3
LP
2854 case SOURCE_POST:
2855 r = s->post.callback(s, s->userdata);
2856 break;
2857
6203e07a
LP
2858 case SOURCE_EXIT:
2859 r = s->exit.callback(s, s->userdata);
da7e457c 2860 break;
9d3e3aa5 2861
97ef5391
LP
2862 case SOURCE_INOTIFY: {
2863 struct sd_event *e = s->event;
2864 struct inotify_data *d;
2865 size_t sz;
2866
2867 assert(s->inotify.inode_data);
2868 assert_se(d = s->inotify.inode_data->inotify_data);
2869
2870 assert(d->buffer_filled >= offsetof(struct inotify_event, name));
2871 sz = offsetof(struct inotify_event, name) + d->buffer.ev.len;
2872 assert(d->buffer_filled >= sz);
2873
2874 r = s->inotify.callback(s, &d->buffer.ev, s->userdata);
2875
2876 /* When no event is pending anymore on this inotify object, then let's drop the event from the
2877 * buffer. */
2878 if (d->n_pending == 0)
2879 event_inotify_data_drop(e, d, sz);
2880
2881 break;
2882 }
2883
9d3e3aa5 2884 case SOURCE_WATCHDOG:
a71fe8b8 2885 case _SOURCE_EVENT_SOURCE_TYPE_MAX:
9f2a50a3 2886 case _SOURCE_EVENT_SOURCE_TYPE_INVALID:
9d3e3aa5 2887 assert_not_reached("Wut? I shouldn't exist.");
fd38203a
LP
2888 }
2889
12179984
LP
2890 s->dispatching = false;
2891
55cbfaa5
DM
2892 if (r < 0)
2893 log_debug_errno(r, "Event source %s (type %s) returned error, disabling: %m",
8f5c235d 2894 strna(s->description), event_source_type_to_string(saved_type));
12179984
LP
2895
2896 if (s->n_ref == 0)
2897 source_free(s);
2898 else if (r < 0)
6203e07a 2899 sd_event_source_set_enabled(s, SD_EVENT_OFF);
b7484e2a 2900
6203e07a 2901 return 1;
fd38203a
LP
2902}
2903
2904static int event_prepare(sd_event *e) {
2905 int r;
2906
2907 assert(e);
2908
2909 for (;;) {
2910 sd_event_source *s;
2911
2912 s = prioq_peek(e->prepare);
baf76283 2913 if (!s || s->prepare_iteration == e->iteration || s->enabled == SD_EVENT_OFF)
fd38203a
LP
2914 break;
2915
2916 s->prepare_iteration = e->iteration;
2917 r = prioq_reshuffle(e->prepare, s, &s->prepare_index);
2918 if (r < 0)
2919 return r;
2920
2921 assert(s->prepare);
12179984
LP
2922
2923 s->dispatching = true;
fd38203a 2924 r = s->prepare(s, s->userdata);
12179984
LP
2925 s->dispatching = false;
2926
55cbfaa5
DM
2927 if (r < 0)
2928 log_debug_errno(r, "Prepare callback of event source %s (type %s) returned error, disabling: %m",
2929 strna(s->description), event_source_type_to_string(s->type));
fd38203a 2930
12179984
LP
2931 if (s->n_ref == 0)
2932 source_free(s);
2933 else if (r < 0)
2934 sd_event_source_set_enabled(s, SD_EVENT_OFF);
fd38203a
LP
2935 }
2936
2937 return 0;
2938}
2939
6203e07a 2940static int dispatch_exit(sd_event *e) {
da7e457c 2941 sd_event_source *p;
30dd293c 2942 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
da7e457c
LP
2943 int r;
2944
2945 assert(e);
2946
6203e07a 2947 p = prioq_peek(e->exit);
baf76283 2948 if (!p || p->enabled == SD_EVENT_OFF) {
da7e457c
LP
2949 e->state = SD_EVENT_FINISHED;
2950 return 0;
2951 }
2952
30dd293c 2953 ref = sd_event_ref(e);
da7e457c 2954 e->iteration++;
6203e07a 2955 e->state = SD_EVENT_EXITING;
da7e457c 2956 r = source_dispatch(p);
2b0c9ef7 2957 e->state = SD_EVENT_INITIAL;
da7e457c
LP
2958 return r;
2959}
2960
c2ba3ad6
LP
2961static sd_event_source* event_next_pending(sd_event *e) {
2962 sd_event_source *p;
2963
da7e457c
LP
2964 assert(e);
2965
c2ba3ad6
LP
2966 p = prioq_peek(e->pending);
2967 if (!p)
2968 return NULL;
2969
baf76283 2970 if (p->enabled == SD_EVENT_OFF)
c2ba3ad6
LP
2971 return NULL;
2972
2973 return p;
2974}
2975
cde93897
LP
2976static int arm_watchdog(sd_event *e) {
2977 struct itimerspec its = {};
2978 usec_t t;
2979 int r;
2980
2981 assert(e);
2982 assert(e->watchdog_fd >= 0);
2983
2984 t = sleep_between(e,
2985 e->watchdog_last + (e->watchdog_period / 2),
2986 e->watchdog_last + (e->watchdog_period * 3 / 4));
2987
2988 timespec_store(&its.it_value, t);
2989
75145780
LP
2990 /* Make sure we never set the watchdog to 0, which tells the
2991 * kernel to disable it. */
2992 if (its.it_value.tv_sec == 0 && its.it_value.tv_nsec == 0)
2993 its.it_value.tv_nsec = 1;
2994
cde93897
LP
2995 r = timerfd_settime(e->watchdog_fd, TFD_TIMER_ABSTIME, &its, NULL);
2996 if (r < 0)
2997 return -errno;
2998
2999 return 0;
3000}
3001
3002static int process_watchdog(sd_event *e) {
3003 assert(e);
3004
3005 if (!e->watchdog)
3006 return 0;
3007
3008 /* Don't notify watchdog too often */
3009 if (e->watchdog_last + e->watchdog_period / 4 > e->timestamp.monotonic)
3010 return 0;
3011
3012 sd_notify(false, "WATCHDOG=1");
3013 e->watchdog_last = e->timestamp.monotonic;
3014
3015 return arm_watchdog(e);
3016}
3017
97ef5391
LP
3018static void event_close_inode_data_fds(sd_event *e) {
3019 struct inode_data *d;
3020
3021 assert(e);
3022
3023 /* Close the fds pointing to the inodes to watch now. We need to close them as they might otherwise pin
3024 * filesystems. But we can't close them right-away as we need them as long as the user still wants to make
3025 * adjustments to the even source, such as changing the priority (which requires us to remove and readd a watch
3026 * for the inode). Hence, let's close them when entering the first iteration after they were added, as a
3027 * compromise. */
3028
3029 while ((d = e->inode_data_to_close)) {
3030 assert(d->fd >= 0);
3031 d->fd = safe_close(d->fd);
3032
3033 LIST_REMOVE(to_close, e->inode_data_to_close, d);
3034 }
3035}
3036
c45a5a74
TG
3037_public_ int sd_event_prepare(sd_event *e) {
3038 int r;
fd38203a 3039
da7e457c 3040 assert_return(e, -EINVAL);
b937d761 3041 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3042 assert_return(!event_pid_changed(e), -ECHILD);
3043 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3044 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3045
6203e07a 3046 if (e->exit_requested)
c45a5a74 3047 goto pending;
fd38203a
LP
3048
3049 e->iteration++;
3050
0be6c2f6 3051 e->state = SD_EVENT_PREPARING;
fd38203a 3052 r = event_prepare(e);
0be6c2f6 3053 e->state = SD_EVENT_INITIAL;
fd38203a 3054 if (r < 0)
c45a5a74 3055 return r;
fd38203a 3056
6a0f1f6d
LP
3057 r = event_arm_timer(e, &e->realtime);
3058 if (r < 0)
c45a5a74 3059 return r;
6a0f1f6d 3060
a8548816
TG
3061 r = event_arm_timer(e, &e->boottime);
3062 if (r < 0)
c45a5a74 3063 return r;
a8548816 3064
6a0f1f6d
LP
3065 r = event_arm_timer(e, &e->monotonic);
3066 if (r < 0)
c45a5a74 3067 return r;
6a0f1f6d
LP
3068
3069 r = event_arm_timer(e, &e->realtime_alarm);
1b5995b0 3070 if (r < 0)
c45a5a74 3071 return r;
fd38203a 3072
6a0f1f6d 3073 r = event_arm_timer(e, &e->boottime_alarm);
1b5995b0 3074 if (r < 0)
c45a5a74 3075 return r;
fd38203a 3076
97ef5391
LP
3077 event_close_inode_data_fds(e);
3078
1b5995b0 3079 if (event_next_pending(e) || e->need_process_child)
c45a5a74
TG
3080 goto pending;
3081
2b0c9ef7 3082 e->state = SD_EVENT_ARMED;
c45a5a74
TG
3083
3084 return 0;
3085
3086pending:
2b0c9ef7 3087 e->state = SD_EVENT_ARMED;
6d148a84
TG
3088 r = sd_event_wait(e, 0);
3089 if (r == 0)
2b0c9ef7 3090 e->state = SD_EVENT_ARMED;
6d148a84
TG
3091
3092 return r;
c45a5a74
TG
3093}
3094
3095_public_ int sd_event_wait(sd_event *e, uint64_t timeout) {
3096 struct epoll_event *ev_queue;
3097 unsigned ev_queue_max;
3098 int r, m, i;
3099
3100 assert_return(e, -EINVAL);
b937d761 3101 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3102 assert_return(!event_pid_changed(e), -ECHILD);
3103 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3104 assert_return(e->state == SD_EVENT_ARMED, -EBUSY);
c45a5a74
TG
3105
3106 if (e->exit_requested) {
3107 e->state = SD_EVENT_PENDING;
3108 return 1;
3109 }
6a0f1f6d 3110
1c724e9e 3111 ev_queue_max = MAX(e->n_sources, 1u);
15b38f93 3112 ev_queue = newa(struct epoll_event, ev_queue_max);
fd38203a 3113
97ef5391
LP
3114 /* If we still have inotify data buffered, then query the other fds, but don't wait on it */
3115 if (e->inotify_data_buffered)
3116 timeout = 0;
3117
15b38f93 3118 m = epoll_wait(e->epoll_fd, ev_queue, ev_queue_max,
bab4820e 3119 timeout == (uint64_t) -1 ? -1 : (int) DIV_ROUND_UP(timeout, USEC_PER_MSEC));
da7e457c 3120 if (m < 0) {
c45a5a74
TG
3121 if (errno == EINTR) {
3122 e->state = SD_EVENT_PENDING;
3123 return 1;
3124 }
3125
3126 r = -errno;
da7e457c
LP
3127 goto finish;
3128 }
fd38203a 3129
e475d10c 3130 triple_timestamp_get(&e->timestamp);
fd38203a
LP
3131
3132 for (i = 0; i < m; i++) {
3133
9da4cb2b 3134 if (ev_queue[i].data.ptr == INT_TO_PTR(SOURCE_WATCHDOG))
cde93897 3135 r = flush_timer(e, e->watchdog_fd, ev_queue[i].events, NULL);
9da4cb2b
LP
3136 else {
3137 WakeupType *t = ev_queue[i].data.ptr;
3138
3139 switch (*t) {
3140
3141 case WAKEUP_EVENT_SOURCE:
3142 r = process_io(e, ev_queue[i].data.ptr, ev_queue[i].events);
3143 break;
fd38203a 3144
9da4cb2b
LP
3145 case WAKEUP_CLOCK_DATA: {
3146 struct clock_data *d = ev_queue[i].data.ptr;
3147 r = flush_timer(e, d->fd, ev_queue[i].events, &d->next);
3148 break;
3149 }
3150
3151 case WAKEUP_SIGNAL_DATA:
3152 r = process_signal(e, ev_queue[i].data.ptr, ev_queue[i].events);
3153 break;
3154
97ef5391
LP
3155 case WAKEUP_INOTIFY_DATA:
3156 r = event_inotify_data_read(e, ev_queue[i].data.ptr, ev_queue[i].events);
3157 break;
3158
9da4cb2b
LP
3159 default:
3160 assert_not_reached("Invalid wake-up pointer");
3161 }
3162 }
fd38203a 3163 if (r < 0)
da7e457c 3164 goto finish;
fd38203a
LP
3165 }
3166
cde93897
LP
3167 r = process_watchdog(e);
3168 if (r < 0)
3169 goto finish;
3170
6a0f1f6d
LP
3171 r = process_timer(e, e->timestamp.realtime, &e->realtime);
3172 if (r < 0)
3173 goto finish;
3174
e475d10c 3175 r = process_timer(e, e->timestamp.boottime, &e->boottime);
a8548816
TG
3176 if (r < 0)
3177 goto finish;
3178
6a0f1f6d
LP
3179 r = process_timer(e, e->timestamp.monotonic, &e->monotonic);
3180 if (r < 0)
3181 goto finish;
3182
3183 r = process_timer(e, e->timestamp.realtime, &e->realtime_alarm);
fd38203a 3184 if (r < 0)
da7e457c 3185 goto finish;
fd38203a 3186
e475d10c 3187 r = process_timer(e, e->timestamp.boottime, &e->boottime_alarm);
fd38203a 3188 if (r < 0)
da7e457c 3189 goto finish;
fd38203a 3190
c2ba3ad6 3191 if (e->need_process_child) {
fd38203a
LP
3192 r = process_child(e);
3193 if (r < 0)
da7e457c 3194 goto finish;
fd38203a
LP
3195 }
3196
97ef5391
LP
3197 r = process_inotify(e);
3198 if (r < 0)
3199 goto finish;
3200
c45a5a74
TG
3201 if (event_next_pending(e)) {
3202 e->state = SD_EVENT_PENDING;
3203
3204 return 1;
da7e457c
LP
3205 }
3206
c45a5a74 3207 r = 0;
fd38203a 3208
da7e457c 3209finish:
2b0c9ef7 3210 e->state = SD_EVENT_INITIAL;
da7e457c
LP
3211
3212 return r;
fd38203a
LP
3213}
3214
c45a5a74
TG
3215_public_ int sd_event_dispatch(sd_event *e) {
3216 sd_event_source *p;
3217 int r;
3218
3219 assert_return(e, -EINVAL);
b937d761 3220 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3221 assert_return(!event_pid_changed(e), -ECHILD);
3222 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3223 assert_return(e->state == SD_EVENT_PENDING, -EBUSY);
3224
3225 if (e->exit_requested)
3226 return dispatch_exit(e);
3227
3228 p = event_next_pending(e);
3229 if (p) {
30dd293c 3230 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
c45a5a74 3231
30dd293c 3232 ref = sd_event_ref(e);
c45a5a74
TG
3233 e->state = SD_EVENT_RUNNING;
3234 r = source_dispatch(p);
2b0c9ef7 3235 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3236 return r;
3237 }
3238
2b0c9ef7 3239 e->state = SD_EVENT_INITIAL;
c45a5a74
TG
3240
3241 return 1;
3242}
3243
34b87517
VC
3244static void event_log_delays(sd_event *e) {
3245 char b[ELEMENTSOF(e->delays) * DECIMAL_STR_MAX(unsigned) + 1];
34a6843d
VC
3246 unsigned i;
3247 int o;
34b87517
VC
3248
3249 for (i = o = 0; i < ELEMENTSOF(e->delays); i++) {
3250 o += snprintf(&b[o], sizeof(b) - o, "%u ", e->delays[i]);
3251 e->delays[i] = 0;
3252 }
34a6843d 3253 log_debug("Event loop iterations: %.*s", o, b);
34b87517
VC
3254}
3255
c45a5a74
TG
3256_public_ int sd_event_run(sd_event *e, uint64_t timeout) {
3257 int r;
3258
3259 assert_return(e, -EINVAL);
b937d761 3260 assert_return(e = event_resolve(e), -ENOPKG);
c45a5a74
TG
3261 assert_return(!event_pid_changed(e), -ECHILD);
3262 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
2b0c9ef7 3263 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
c45a5a74 3264
34b87517
VC
3265 if (e->profile_delays && e->last_run) {
3266 usec_t this_run;
3267 unsigned l;
3268
3269 this_run = now(CLOCK_MONOTONIC);
3270
3271 l = u64log2(this_run - e->last_run);
3272 assert(l < sizeof(e->delays));
3273 e->delays[l]++;
3274
3275 if (this_run - e->last_log >= 5*USEC_PER_SEC) {
3276 event_log_delays(e);
3277 e->last_log = this_run;
3278 }
3279 }
3280
c45a5a74 3281 r = sd_event_prepare(e);
53bac4e0
LP
3282 if (r == 0)
3283 /* There was nothing? Then wait... */
3284 r = sd_event_wait(e, timeout);
c45a5a74 3285
34b87517
VC
3286 if (e->profile_delays)
3287 e->last_run = now(CLOCK_MONOTONIC);
3288
02d30981 3289 if (r > 0) {
53bac4e0 3290 /* There's something now, then let's dispatch it */
02d30981
TG
3291 r = sd_event_dispatch(e);
3292 if (r < 0)
3293 return r;
53bac4e0
LP
3294
3295 return 1;
3296 }
3297
3298 return r;
c45a5a74
TG
3299}
3300
f7262a9f 3301_public_ int sd_event_loop(sd_event *e) {
30dd293c 3302 _cleanup_(sd_event_unrefp) sd_event *ref = NULL;
fd38203a
LP
3303 int r;
3304
da7e457c 3305 assert_return(e, -EINVAL);
b937d761 3306 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c 3307 assert_return(!event_pid_changed(e), -ECHILD);
2b0c9ef7 3308 assert_return(e->state == SD_EVENT_INITIAL, -EBUSY);
da7e457c 3309
30dd293c 3310 ref = sd_event_ref(e);
fd38203a 3311
da7e457c 3312 while (e->state != SD_EVENT_FINISHED) {
fd38203a
LP
3313 r = sd_event_run(e, (uint64_t) -1);
3314 if (r < 0)
30dd293c 3315 return r;
fd38203a
LP
3316 }
3317
30dd293c 3318 return e->exit_code;
fd38203a
LP
3319}
3320
9b364545
TG
3321_public_ int sd_event_get_fd(sd_event *e) {
3322
3323 assert_return(e, -EINVAL);
b937d761 3324 assert_return(e = event_resolve(e), -ENOPKG);
9b364545
TG
3325 assert_return(!event_pid_changed(e), -ECHILD);
3326
3327 return e->epoll_fd;
3328}
3329
f7262a9f 3330_public_ int sd_event_get_state(sd_event *e) {
da7e457c 3331 assert_return(e, -EINVAL);
b937d761 3332 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3333 assert_return(!event_pid_changed(e), -ECHILD);
3334
3335 return e->state;
3336}
3337
6203e07a 3338_public_ int sd_event_get_exit_code(sd_event *e, int *code) {
da7e457c 3339 assert_return(e, -EINVAL);
b937d761 3340 assert_return(e = event_resolve(e), -ENOPKG);
6203e07a 3341 assert_return(code, -EINVAL);
da7e457c 3342 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3343
6203e07a
LP
3344 if (!e->exit_requested)
3345 return -ENODATA;
3346
3347 *code = e->exit_code;
3348 return 0;
fd38203a
LP
3349}
3350
6203e07a 3351_public_ int sd_event_exit(sd_event *e, int code) {
da7e457c 3352 assert_return(e, -EINVAL);
b937d761 3353 assert_return(e = event_resolve(e), -ENOPKG);
da7e457c
LP
3354 assert_return(e->state != SD_EVENT_FINISHED, -ESTALE);
3355 assert_return(!event_pid_changed(e), -ECHILD);
fd38203a 3356
6203e07a
LP
3357 e->exit_requested = true;
3358 e->exit_code = code;
3359
fd38203a
LP
3360 return 0;
3361}
46e8c825 3362
6a0f1f6d 3363_public_ int sd_event_now(sd_event *e, clockid_t clock, uint64_t *usec) {
46e8c825 3364 assert_return(e, -EINVAL);
b937d761 3365 assert_return(e = event_resolve(e), -ENOPKG);
46e8c825 3366 assert_return(usec, -EINVAL);
46e8c825
LP
3367 assert_return(!event_pid_changed(e), -ECHILD);
3368
e475d10c
LP
3369 if (!TRIPLE_TIMESTAMP_HAS_CLOCK(clock))
3370 return -EOPNOTSUPP;
3371
3372 /* Generate a clean error in case CLOCK_BOOTTIME is not available. Note that don't use clock_supported() here,
3373 * for a reason: there are systems where CLOCK_BOOTTIME is supported, but CLOCK_BOOTTIME_ALARM is not, but for
3374 * the purpose of getting the time this doesn't matter. */
3411372e
LP
3375 if (IN_SET(clock, CLOCK_BOOTTIME, CLOCK_BOOTTIME_ALARM) && !clock_boottime_supported())
3376 return -EOPNOTSUPP;
3377
e475d10c 3378 if (!triple_timestamp_is_set(&e->timestamp)) {
38a03f06
LP
3379 /* Implicitly fall back to now() if we never ran
3380 * before and thus have no cached time. */
3381 *usec = now(clock);
3382 return 1;
3383 }
46e8c825 3384
e475d10c 3385 *usec = triple_timestamp_by_clock(&e->timestamp, clock);
46e8c825
LP
3386 return 0;
3387}
afc6adb5
LP
3388
3389_public_ int sd_event_default(sd_event **ret) {
39883f62 3390 sd_event *e = NULL;
afc6adb5
LP
3391 int r;
3392
3393 if (!ret)
3394 return !!default_event;
3395
3396 if (default_event) {
3397 *ret = sd_event_ref(default_event);
3398 return 0;
3399 }
3400
3401 r = sd_event_new(&e);
3402 if (r < 0)
3403 return r;
3404
3405 e->default_event_ptr = &default_event;
3406 e->tid = gettid();
3407 default_event = e;
3408
3409 *ret = e;
3410 return 1;
3411}
3412
3413_public_ int sd_event_get_tid(sd_event *e, pid_t *tid) {
3414 assert_return(e, -EINVAL);
b937d761 3415 assert_return(e = event_resolve(e), -ENOPKG);
afc6adb5 3416 assert_return(tid, -EINVAL);
76b54375 3417 assert_return(!event_pid_changed(e), -ECHILD);
afc6adb5 3418
76b54375
LP
3419 if (e->tid != 0) {
3420 *tid = e->tid;
3421 return 0;
3422 }
3423
3424 return -ENXIO;
afc6adb5 3425}
cde93897
LP
3426
3427_public_ int sd_event_set_watchdog(sd_event *e, int b) {
3428 int r;
3429
3430 assert_return(e, -EINVAL);
b937d761 3431 assert_return(e = event_resolve(e), -ENOPKG);
8f726607 3432 assert_return(!event_pid_changed(e), -ECHILD);
cde93897
LP
3433
3434 if (e->watchdog == !!b)
3435 return e->watchdog;
3436
3437 if (b) {
a82f89aa 3438 struct epoll_event ev;
cde93897 3439
09812eb7
LP
3440 r = sd_watchdog_enabled(false, &e->watchdog_period);
3441 if (r <= 0)
cde93897 3442 return r;
cde93897
LP
3443
3444 /* Issue first ping immediately */
3445 sd_notify(false, "WATCHDOG=1");
3446 e->watchdog_last = now(CLOCK_MONOTONIC);
3447
3448 e->watchdog_fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK|TFD_CLOEXEC);
3449 if (e->watchdog_fd < 0)
3450 return -errno;
3451
3452 r = arm_watchdog(e);
3453 if (r < 0)
3454 goto fail;
3455
a82f89aa
LP
3456 ev = (struct epoll_event) {
3457 .events = EPOLLIN,
3458 .data.ptr = INT_TO_PTR(SOURCE_WATCHDOG),
3459 };
cde93897
LP
3460
3461 r = epoll_ctl(e->epoll_fd, EPOLL_CTL_ADD, e->watchdog_fd, &ev);
3462 if (r < 0) {
3463 r = -errno;
3464 goto fail;
3465 }
3466
3467 } else {
3468 if (e->watchdog_fd >= 0) {
3469 epoll_ctl(e->epoll_fd, EPOLL_CTL_DEL, e->watchdog_fd, NULL);
03e334a1 3470 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3471 }
3472 }
3473
3474 e->watchdog = !!b;
3475 return e->watchdog;
3476
3477fail:
03e334a1 3478 e->watchdog_fd = safe_close(e->watchdog_fd);
cde93897
LP
3479 return r;
3480}
8f726607
LP
3481
3482_public_ int sd_event_get_watchdog(sd_event *e) {
3483 assert_return(e, -EINVAL);
b937d761 3484 assert_return(e = event_resolve(e), -ENOPKG);
8f726607
LP
3485 assert_return(!event_pid_changed(e), -ECHILD);
3486
3487 return e->watchdog;
3488}
60a3b1e1
LP
3489
3490_public_ int sd_event_get_iteration(sd_event *e, uint64_t *ret) {
3491 assert_return(e, -EINVAL);
b937d761 3492 assert_return(e = event_resolve(e), -ENOPKG);
60a3b1e1
LP
3493 assert_return(!event_pid_changed(e), -ECHILD);
3494
3495 *ret = e->iteration;
3496 return 0;
3497}
15723a1d
LP
3498
3499_public_ int sd_event_source_set_destroy_callback(sd_event_source *s, sd_event_destroy_t callback) {
3500 assert_return(s, -EINVAL);
3501
3502 s->destroy_callback = callback;
3503 return 0;
3504}
3505
3506_public_ int sd_event_source_get_destroy_callback(sd_event_source *s, sd_event_destroy_t *ret) {
3507 assert_return(s, -EINVAL);
3508
3509 if (ret)
3510 *ret = s->destroy_callback;
3511
3512 return !!s->destroy_callback;
3513}
2382c936
YW
3514
3515_public_ int sd_event_source_get_floating(sd_event_source *s) {
3516 assert_return(s, -EINVAL);
3517
3518 return s->floating;
3519}
3520
3521_public_ int sd_event_source_set_floating(sd_event_source *s, int b) {
3522 assert_return(s, -EINVAL);
3523
3524 if (s->floating == !!b)
3525 return 0;
3526
3527 if (!s->event) /* Already disconnected */
3528 return -ESTALE;
3529
3530 s->floating = b;
3531
3532 if (b) {
3533 sd_event_source_ref(s);
3534 sd_event_unref(s->event);
3535 } else {
3536 sd_event_ref(s->event);
3537 sd_event_source_unref(s);
3538 }
3539
3540 return 1;
3541}