]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/barrier.c
tree-wide: drop license boilerplate
[thirdparty/systemd.git] / src / basic / barrier.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
279da1e3
DH
2/***
3 This file is part of systemd.
4
5 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
279da1e3
DH
6***/
7
8#include <errno.h>
9#include <fcntl.h>
279da1e3
DH
10#include <poll.h>
11#include <stdbool.h>
12#include <stdint.h>
279da1e3 13#include <stdlib.h>
279da1e3
DH
14#include <sys/eventfd.h>
15#include <sys/types.h>
16#include <unistd.h>
17
18#include "barrier.h"
3ffd4af2 19#include "fd-util.h"
279da1e3 20#include "macro.h"
279da1e3
DH
21
22/**
23 * Barriers
24 * This barrier implementation provides a simple synchronization method based
25 * on file-descriptors that can safely be used between threads and processes. A
26 * barrier object contains 2 shared counters based on eventfd. Both processes
27 * can now place barriers and wait for the other end to reach a random or
28 * specific barrier.
29 * Barriers are numbered, so you can either wait for the other end to reach any
30 * barrier or the last barrier that you placed. This way, you can use barriers
31 * for one-way *and* full synchronization. Note that even-though barriers are
32 * numbered, these numbers are internal and recycled once both sides reached the
33 * same barrier (implemented as a simple signed counter). It is thus not
34 * possible to address barriers by their ID.
35 *
36 * Barrier-API: Both ends can place as many barriers via barrier_place() as
37 * they want and each pair of barriers on both sides will be implicitly linked.
38 * Each side can use the barrier_wait/sync_*() family of calls to wait for the
39 * other side to place a specific barrier. barrier_wait_next() waits until the
40 * other side calls barrier_place(). No links between the barriers are
41 * considered and this simply serves as most basic asynchronous barrier.
42 * barrier_sync_next() is like barrier_wait_next() and waits for the other side
43 * to place their next barrier via barrier_place(). However, it only waits for
44 * barriers that are linked to a barrier we already placed. If the other side
45 * already placed more barriers than we did, barrier_sync_next() returns
46 * immediately.
47 * barrier_sync() extends barrier_sync_next() and waits until the other end
48 * placed as many barriers via barrier_place() as we did. If they already placed
49 * as many as we did (or more), it returns immediately.
50 *
51 * Additionally to basic barriers, an abortion event is available.
52 * barrier_abort() places an abortion event that cannot be undone. An abortion
53 * immediately cancels all placed barriers and replaces them. Any running and
54 * following wait/sync call besides barrier_wait_abortion() will immediately
55 * return false on both sides (otherwise, they always return true).
56 * barrier_abort() can be called multiple times on both ends and will be a
57 * no-op if already called on this side.
58 * barrier_wait_abortion() can be used to wait for the other side to call
59 * barrier_abort() and is the only wait/sync call that does not return
60 * immediately if we aborted outself. It only returns once the other side
61 * called barrier_abort().
62 *
63 * Barriers can be used for in-process and inter-process synchronization.
64 * However, for in-process synchronization you could just use mutexes.
65 * Therefore, main target is IPC and we require both sides to *not* share the FD
66 * table. If that's given, barriers provide target tracking: If the remote side
67 * exit()s, an abortion event is implicitly queued on the other side. This way,
68 * a sync/wait call will be woken up if the remote side crashed or exited
69 * unexpectedly. However, note that these abortion events are only queued if the
70 * barrier-queue has been drained. Therefore, it is safe to place a barrier and
71 * exit. The other side can safely wait on the barrier even though the exit
72 * queued an abortion event. Usually, the abortion event would overwrite the
73 * barrier, however, that's not true for exit-abortion events. Those are only
74 * queued if the barrier-queue is drained (thus, the receiving side has placed
75 * more barriers than the remote side).
76 */
77
78/**
7566e267 79 * barrier_create() - Initialize a barrier object
279da1e3
DH
80 * @obj: barrier to initialize
81 *
82 * This initializes a barrier object. The caller is responsible of allocating
83 * the memory and keeping it valid. The memory does not have to be zeroed
84 * beforehand.
85 * Two eventfd objects are allocated for each barrier. If allocation fails, an
86 * error is returned.
87 *
88 * If this function fails, the barrier is reset to an invalid state so it is
89 * safe to call barrier_destroy() on the object regardless whether the
90 * initialization succeeded or not.
91 *
92 * The caller is responsible to destroy the object via barrier_destroy() before
93 * releasing the underlying memory.
94 *
95 * Returns: 0 on success, negative error code on failure.
96 */
7566e267 97int barrier_create(Barrier *b) {
fc808616
DH
98 _cleanup_(barrier_destroyp) Barrier *staging = b;
99 int r;
100
7566e267 101 assert(b);
279da1e3 102
fc808616
DH
103 b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
104 if (b->me < 0)
105 return -errno;
106
107 b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
108 if (b->them < 0)
109 return -errno;
110
111 r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
112 if (r < 0)
279da1e3
DH
113 return -errno;
114
fc808616 115 staging = NULL;
279da1e3
DH
116 return 0;
117}
118
119/**
120 * barrier_destroy() - Destroy a barrier object
121 * @b: barrier to destroy or NULL
122 *
7566e267
ZJS
123 * This destroys a barrier object that has previously been passed to
124 * barrier_create(). The object is released and reset to invalid
125 * state. Therefore, it is safe to call barrier_destroy() multiple
126 * times or even if barrier_create() failed. However, barrier must be
ff9b60f3 127 * always initialized with BARRIER_NULL.
279da1e3
DH
128 *
129 * If @b is NULL, this is a no-op.
130 */
131void barrier_destroy(Barrier *b) {
132 if (!b)
133 return;
134
279da1e3
DH
135 b->me = safe_close(b->me);
136 b->them = safe_close(b->them);
7566e267 137 safe_close_pair(b->pipe);
279da1e3
DH
138 b->barriers = 0;
139}
140
141/**
142 * barrier_set_role() - Set the local role of the barrier
143 * @b: barrier to operate on
144 * @role: role to set on the barrier
145 *
7566e267
ZJS
146 * This sets the roles on a barrier object. This is needed to know
147 * which side of the barrier you're on. Usually, the parent creates
148 * the barrier via barrier_create() and then calls fork() or clone().
149 * Therefore, the FDs are duplicated and the child retains the same
150 * barrier object.
279da1e3 151 *
7566e267
ZJS
152 * Both sides need to call barrier_set_role() after fork() or clone()
153 * are done. If this is not done, barriers will not work correctly.
279da1e3
DH
154 *
155 * Note that barriers could be supported without fork() or clone(). However,
156 * this is currently not needed so it hasn't been implemented.
157 */
158void barrier_set_role(Barrier *b, unsigned int role) {
159 int fd;
160
161 assert(b);
3742095b 162 assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
279da1e3 163 /* make sure this is only called once */
3f7f1fad 164 assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
279da1e3 165
7566e267 166 if (role == BARRIER_PARENT)
279da1e3 167 b->pipe[1] = safe_close(b->pipe[1]);
7566e267 168 else {
279da1e3
DH
169 b->pipe[0] = safe_close(b->pipe[0]);
170
171 /* swap me/them for children */
172 fd = b->me;
173 b->me = b->them;
174 b->them = fd;
175 }
176}
177
178/* places barrier; returns false if we aborted, otherwise true */
179static bool barrier_write(Barrier *b, uint64_t buf) {
180 ssize_t len;
181
182 /* prevent new sync-points if we already aborted */
183 if (barrier_i_aborted(b))
184 return false;
185
5c687f08 186 assert(b->me >= 0);
279da1e3
DH
187 do {
188 len = write(b->me, &buf, sizeof(buf));
7566e267 189 } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
279da1e3
DH
190
191 if (len != sizeof(buf))
192 goto error;
193
194 /* lock if we aborted */
195 if (buf >= (uint64_t)BARRIER_ABORTION) {
196 if (barrier_they_aborted(b))
197 b->barriers = BARRIER_WE_ABORTED;
198 else
199 b->barriers = BARRIER_I_ABORTED;
7566e267 200 } else if (!barrier_is_aborted(b))
279da1e3 201 b->barriers += buf;
279da1e3
DH
202
203 return !barrier_i_aborted(b);
204
205error:
206 /* If there is an unexpected error, we have to make this fatal. There
207 * is no way we can recover from sync-errors. Therefore, we close the
208 * pipe-ends and treat this as abortion. The other end will notice the
209 * pipe-close and treat it as abortion, too. */
210
7566e267 211 safe_close_pair(b->pipe);
279da1e3
DH
212 b->barriers = BARRIER_WE_ABORTED;
213 return false;
214}
215
216/* waits for barriers; returns false if they aborted, otherwise true */
217static bool barrier_read(Barrier *b, int64_t comp) {
279da1e3
DH
218 if (barrier_they_aborted(b))
219 return false;
220
221 while (b->barriers > comp) {
7566e267
ZJS
222 struct pollfd pfd[2] = {
223 { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
224 .events = POLLHUP },
225 { .fd = b->them,
226 .events = POLLIN }};
227 uint64_t buf;
228 int r;
279da1e3
DH
229
230 r = poll(pfd, 2, -1);
7566e267 231 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
279da1e3
DH
232 continue;
233 else if (r < 0)
234 goto error;
235
236 if (pfd[1].revents) {
7566e267
ZJS
237 ssize_t len;
238
239 /* events on @them signal new data for us */
279da1e3 240 len = read(b->them, &buf, sizeof(buf));
7566e267 241 if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
279da1e3
DH
242 continue;
243
244 if (len != sizeof(buf))
245 goto error;
7566e267 246 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
279da1e3
DH
247 /* POLLHUP on the pipe tells us the other side exited.
248 * We treat this as implicit abortion. But we only
249 * handle it if there's no event on the eventfd. This
250 * guarantees that exit-abortions do not overwrite real
251 * barriers. */
252 buf = BARRIER_ABORTION;
53290ee3
DH
253 else
254 continue;
279da1e3
DH
255
256 /* lock if they aborted */
257 if (buf >= (uint64_t)BARRIER_ABORTION) {
258 if (barrier_i_aborted(b))
259 b->barriers = BARRIER_WE_ABORTED;
260 else
261 b->barriers = BARRIER_THEY_ABORTED;
7566e267 262 } else if (!barrier_is_aborted(b))
279da1e3 263 b->barriers -= buf;
279da1e3
DH
264 }
265
266 return !barrier_they_aborted(b);
267
268error:
269 /* If there is an unexpected error, we have to make this fatal. There
270 * is no way we can recover from sync-errors. Therefore, we close the
271 * pipe-ends and treat this as abortion. The other end will notice the
272 * pipe-close and treat it as abortion, too. */
273
7566e267 274 safe_close_pair(b->pipe);
279da1e3
DH
275 b->barriers = BARRIER_WE_ABORTED;
276 return false;
277}
278
279/**
280 * barrier_place() - Place a new barrier
281 * @b: barrier object
282 *
283 * This places a new barrier on the barrier object. If either side already
284 * aborted, this is a no-op and returns "false". Otherwise, the barrier is
285 * placed and this returns "true".
286 *
287 * Returns: true if barrier was placed, false if either side aborted.
288 */
289bool barrier_place(Barrier *b) {
290 assert(b);
291
292 if (barrier_is_aborted(b))
293 return false;
294
295 barrier_write(b, BARRIER_SINGLE);
296 return true;
297}
298
299/**
300 * barrier_abort() - Abort the synchronization
301 * @b: barrier object to abort
302 *
303 * This aborts the barrier-synchronization. If barrier_abort() was already
304 * called on this side, this is a no-op. Otherwise, the barrier is put into the
305 * ABORT-state and will stay there. The other side is notified about the
306 * abortion. Any following attempt to place normal barriers or to wait on normal
307 * barriers will return immediately as "false".
308 *
309 * You can wait for the other side to call barrier_abort(), too. Use
310 * barrier_wait_abortion() for that.
311 *
312 * Returns: false if the other side already aborted, true otherwise.
313 */
314bool barrier_abort(Barrier *b) {
315 assert(b);
316
317 barrier_write(b, BARRIER_ABORTION);
318 return !barrier_they_aborted(b);
319}
320
321/**
322 * barrier_wait_next() - Wait for the next barrier of the other side
323 * @b: barrier to operate on
324 *
325 * This waits until the other side places its next barrier. This is independent
326 * of any barrier-links and just waits for any next barrier of the other side.
327 *
328 * If either side aborted, this returns false.
329 *
330 * Returns: false if either side aborted, true otherwise.
331 */
332bool barrier_wait_next(Barrier *b) {
333 assert(b);
334
335 if (barrier_is_aborted(b))
336 return false;
337
338 barrier_read(b, b->barriers - 1);
339 return !barrier_is_aborted(b);
340}
341
342/**
343 * barrier_wait_abortion() - Wait for the other side to abort
344 * @b: barrier to operate on
345 *
346 * This waits until the other side called barrier_abort(). This can be called
347 * regardless whether the local side already called barrier_abort() or not.
348 *
349 * If the other side has already aborted, this returns immediately.
350 *
351 * Returns: false if the local side aborted, true otherwise.
352 */
353bool barrier_wait_abortion(Barrier *b) {
354 assert(b);
355
356 barrier_read(b, BARRIER_THEY_ABORTED);
357 return !barrier_i_aborted(b);
358}
359
360/**
361 * barrier_sync_next() - Wait for the other side to place a next linked barrier
362 * @b: barrier to operate on
363 *
364 * This is like barrier_wait_next() and waits for the other side to call
365 * barrier_place(). However, this only waits for linked barriers. That means, if
366 * the other side already placed more barriers than (or as much as) we did, this
367 * returns immediately instead of waiting.
368 *
369 * If either side aborted, this returns false.
370 *
371 * Returns: false if either side aborted, true otherwise.
372 */
373bool barrier_sync_next(Barrier *b) {
374 assert(b);
375
376 if (barrier_is_aborted(b))
377 return false;
378
379 barrier_read(b, MAX((int64_t)0, b->barriers - 1));
380 return !barrier_is_aborted(b);
381}
382
383/**
384 * barrier_sync() - Wait for the other side to place as many barriers as we did
385 * @b: barrier to operate on
386 *
387 * This is like barrier_sync_next() but waits for the other side to call
388 * barrier_place() as often as we did (in total). If they already placed as much
389 * as we did (or more), this returns immediately instead of waiting.
390 *
391 * If either side aborted, this returns false.
392 *
393 * Returns: false if either side aborted, true otherwise.
394 */
395bool barrier_sync(Barrier *b) {
396 assert(b);
397
398 if (barrier_is_aborted(b))
399 return false;
400
401 barrier_read(b, 0);
402 return !barrier_is_aborted(b);
403}