]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/barrier.c
Add SPDX license identifiers to source files under the LGPL
[thirdparty/systemd.git] / src / basic / barrier.c
CommitLineData
53e1b683 1/* SPDX-License-Identifier: LGPL-2.1+ */
279da1e3
DH
2/***
3 This file is part of systemd.
4
5 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
6
7 systemd is free software; you can redistribute it and/or modify it
8 under the terms of the GNU Lesser General Public License as published by
9 the Free Software Foundation; either version 2.1 of the License, or
10 (at your option) any later version.
11
12 systemd is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 Lesser General Public License for more details.
16
17 You should have received a copy of the GNU Lesser General Public License
18 along with systemd; If not, see <http://www.gnu.org/licenses/>.
19***/
20
21#include <errno.h>
22#include <fcntl.h>
279da1e3
DH
23#include <poll.h>
24#include <stdbool.h>
25#include <stdint.h>
279da1e3 26#include <stdlib.h>
279da1e3
DH
27#include <sys/eventfd.h>
28#include <sys/types.h>
29#include <unistd.h>
30
31#include "barrier.h"
3ffd4af2 32#include "fd-util.h"
279da1e3 33#include "macro.h"
279da1e3
DH
34
35/**
36 * Barriers
37 * This barrier implementation provides a simple synchronization method based
38 * on file-descriptors that can safely be used between threads and processes. A
39 * barrier object contains 2 shared counters based on eventfd. Both processes
40 * can now place barriers and wait for the other end to reach a random or
41 * specific barrier.
42 * Barriers are numbered, so you can either wait for the other end to reach any
43 * barrier or the last barrier that you placed. This way, you can use barriers
44 * for one-way *and* full synchronization. Note that even-though barriers are
45 * numbered, these numbers are internal and recycled once both sides reached the
46 * same barrier (implemented as a simple signed counter). It is thus not
47 * possible to address barriers by their ID.
48 *
49 * Barrier-API: Both ends can place as many barriers via barrier_place() as
50 * they want and each pair of barriers on both sides will be implicitly linked.
51 * Each side can use the barrier_wait/sync_*() family of calls to wait for the
52 * other side to place a specific barrier. barrier_wait_next() waits until the
53 * other side calls barrier_place(). No links between the barriers are
54 * considered and this simply serves as most basic asynchronous barrier.
55 * barrier_sync_next() is like barrier_wait_next() and waits for the other side
56 * to place their next barrier via barrier_place(). However, it only waits for
57 * barriers that are linked to a barrier we already placed. If the other side
58 * already placed more barriers than we did, barrier_sync_next() returns
59 * immediately.
60 * barrier_sync() extends barrier_sync_next() and waits until the other end
61 * placed as many barriers via barrier_place() as we did. If they already placed
62 * as many as we did (or more), it returns immediately.
63 *
64 * Additionally to basic barriers, an abortion event is available.
65 * barrier_abort() places an abortion event that cannot be undone. An abortion
66 * immediately cancels all placed barriers and replaces them. Any running and
67 * following wait/sync call besides barrier_wait_abortion() will immediately
68 * return false on both sides (otherwise, they always return true).
69 * barrier_abort() can be called multiple times on both ends and will be a
70 * no-op if already called on this side.
71 * barrier_wait_abortion() can be used to wait for the other side to call
72 * barrier_abort() and is the only wait/sync call that does not return
73 * immediately if we aborted outself. It only returns once the other side
74 * called barrier_abort().
75 *
76 * Barriers can be used for in-process and inter-process synchronization.
77 * However, for in-process synchronization you could just use mutexes.
78 * Therefore, main target is IPC and we require both sides to *not* share the FD
79 * table. If that's given, barriers provide target tracking: If the remote side
80 * exit()s, an abortion event is implicitly queued on the other side. This way,
81 * a sync/wait call will be woken up if the remote side crashed or exited
82 * unexpectedly. However, note that these abortion events are only queued if the
83 * barrier-queue has been drained. Therefore, it is safe to place a barrier and
84 * exit. The other side can safely wait on the barrier even though the exit
85 * queued an abortion event. Usually, the abortion event would overwrite the
86 * barrier, however, that's not true for exit-abortion events. Those are only
87 * queued if the barrier-queue is drained (thus, the receiving side has placed
88 * more barriers than the remote side).
89 */
90
91/**
7566e267 92 * barrier_create() - Initialize a barrier object
279da1e3
DH
93 * @obj: barrier to initialize
94 *
95 * This initializes a barrier object. The caller is responsible of allocating
96 * the memory and keeping it valid. The memory does not have to be zeroed
97 * beforehand.
98 * Two eventfd objects are allocated for each barrier. If allocation fails, an
99 * error is returned.
100 *
101 * If this function fails, the barrier is reset to an invalid state so it is
102 * safe to call barrier_destroy() on the object regardless whether the
103 * initialization succeeded or not.
104 *
105 * The caller is responsible to destroy the object via barrier_destroy() before
106 * releasing the underlying memory.
107 *
108 * Returns: 0 on success, negative error code on failure.
109 */
7566e267 110int barrier_create(Barrier *b) {
fc808616
DH
111 _cleanup_(barrier_destroyp) Barrier *staging = b;
112 int r;
113
7566e267 114 assert(b);
279da1e3 115
fc808616
DH
116 b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
117 if (b->me < 0)
118 return -errno;
119
120 b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
121 if (b->them < 0)
122 return -errno;
123
124 r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
125 if (r < 0)
279da1e3
DH
126 return -errno;
127
fc808616 128 staging = NULL;
279da1e3
DH
129 return 0;
130}
131
132/**
133 * barrier_destroy() - Destroy a barrier object
134 * @b: barrier to destroy or NULL
135 *
7566e267
ZJS
136 * This destroys a barrier object that has previously been passed to
137 * barrier_create(). The object is released and reset to invalid
138 * state. Therefore, it is safe to call barrier_destroy() multiple
139 * times or even if barrier_create() failed. However, barrier must be
ff9b60f3 140 * always initialized with BARRIER_NULL.
279da1e3
DH
141 *
142 * If @b is NULL, this is a no-op.
143 */
144void barrier_destroy(Barrier *b) {
145 if (!b)
146 return;
147
279da1e3
DH
148 b->me = safe_close(b->me);
149 b->them = safe_close(b->them);
7566e267 150 safe_close_pair(b->pipe);
279da1e3
DH
151 b->barriers = 0;
152}
153
154/**
155 * barrier_set_role() - Set the local role of the barrier
156 * @b: barrier to operate on
157 * @role: role to set on the barrier
158 *
7566e267
ZJS
159 * This sets the roles on a barrier object. This is needed to know
160 * which side of the barrier you're on. Usually, the parent creates
161 * the barrier via barrier_create() and then calls fork() or clone().
162 * Therefore, the FDs are duplicated and the child retains the same
163 * barrier object.
279da1e3 164 *
7566e267
ZJS
165 * Both sides need to call barrier_set_role() after fork() or clone()
166 * are done. If this is not done, barriers will not work correctly.
279da1e3
DH
167 *
168 * Note that barriers could be supported without fork() or clone(). However,
169 * this is currently not needed so it hasn't been implemented.
170 */
171void barrier_set_role(Barrier *b, unsigned int role) {
172 int fd;
173
174 assert(b);
3742095b 175 assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
279da1e3 176 /* make sure this is only called once */
3f7f1fad 177 assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
279da1e3 178
7566e267 179 if (role == BARRIER_PARENT)
279da1e3 180 b->pipe[1] = safe_close(b->pipe[1]);
7566e267 181 else {
279da1e3
DH
182 b->pipe[0] = safe_close(b->pipe[0]);
183
184 /* swap me/them for children */
185 fd = b->me;
186 b->me = b->them;
187 b->them = fd;
188 }
189}
190
191/* places barrier; returns false if we aborted, otherwise true */
192static bool barrier_write(Barrier *b, uint64_t buf) {
193 ssize_t len;
194
195 /* prevent new sync-points if we already aborted */
196 if (barrier_i_aborted(b))
197 return false;
198
5c687f08 199 assert(b->me >= 0);
279da1e3
DH
200 do {
201 len = write(b->me, &buf, sizeof(buf));
7566e267 202 } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
279da1e3
DH
203
204 if (len != sizeof(buf))
205 goto error;
206
207 /* lock if we aborted */
208 if (buf >= (uint64_t)BARRIER_ABORTION) {
209 if (barrier_they_aborted(b))
210 b->barriers = BARRIER_WE_ABORTED;
211 else
212 b->barriers = BARRIER_I_ABORTED;
7566e267 213 } else if (!barrier_is_aborted(b))
279da1e3 214 b->barriers += buf;
279da1e3
DH
215
216 return !barrier_i_aborted(b);
217
218error:
219 /* If there is an unexpected error, we have to make this fatal. There
220 * is no way we can recover from sync-errors. Therefore, we close the
221 * pipe-ends and treat this as abortion. The other end will notice the
222 * pipe-close and treat it as abortion, too. */
223
7566e267 224 safe_close_pair(b->pipe);
279da1e3
DH
225 b->barriers = BARRIER_WE_ABORTED;
226 return false;
227}
228
229/* waits for barriers; returns false if they aborted, otherwise true */
230static bool barrier_read(Barrier *b, int64_t comp) {
279da1e3
DH
231 if (barrier_they_aborted(b))
232 return false;
233
234 while (b->barriers > comp) {
7566e267
ZJS
235 struct pollfd pfd[2] = {
236 { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
237 .events = POLLHUP },
238 { .fd = b->them,
239 .events = POLLIN }};
240 uint64_t buf;
241 int r;
279da1e3
DH
242
243 r = poll(pfd, 2, -1);
7566e267 244 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
279da1e3
DH
245 continue;
246 else if (r < 0)
247 goto error;
248
249 if (pfd[1].revents) {
7566e267
ZJS
250 ssize_t len;
251
252 /* events on @them signal new data for us */
279da1e3 253 len = read(b->them, &buf, sizeof(buf));
7566e267 254 if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
279da1e3
DH
255 continue;
256
257 if (len != sizeof(buf))
258 goto error;
7566e267 259 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
279da1e3
DH
260 /* POLLHUP on the pipe tells us the other side exited.
261 * We treat this as implicit abortion. But we only
262 * handle it if there's no event on the eventfd. This
263 * guarantees that exit-abortions do not overwrite real
264 * barriers. */
265 buf = BARRIER_ABORTION;
53290ee3
DH
266 else
267 continue;
279da1e3
DH
268
269 /* lock if they aborted */
270 if (buf >= (uint64_t)BARRIER_ABORTION) {
271 if (barrier_i_aborted(b))
272 b->barriers = BARRIER_WE_ABORTED;
273 else
274 b->barriers = BARRIER_THEY_ABORTED;
7566e267 275 } else if (!barrier_is_aborted(b))
279da1e3 276 b->barriers -= buf;
279da1e3
DH
277 }
278
279 return !barrier_they_aborted(b);
280
281error:
282 /* If there is an unexpected error, we have to make this fatal. There
283 * is no way we can recover from sync-errors. Therefore, we close the
284 * pipe-ends and treat this as abortion. The other end will notice the
285 * pipe-close and treat it as abortion, too. */
286
7566e267 287 safe_close_pair(b->pipe);
279da1e3
DH
288 b->barriers = BARRIER_WE_ABORTED;
289 return false;
290}
291
292/**
293 * barrier_place() - Place a new barrier
294 * @b: barrier object
295 *
296 * This places a new barrier on the barrier object. If either side already
297 * aborted, this is a no-op and returns "false". Otherwise, the barrier is
298 * placed and this returns "true".
299 *
300 * Returns: true if barrier was placed, false if either side aborted.
301 */
302bool barrier_place(Barrier *b) {
303 assert(b);
304
305 if (barrier_is_aborted(b))
306 return false;
307
308 barrier_write(b, BARRIER_SINGLE);
309 return true;
310}
311
312/**
313 * barrier_abort() - Abort the synchronization
314 * @b: barrier object to abort
315 *
316 * This aborts the barrier-synchronization. If barrier_abort() was already
317 * called on this side, this is a no-op. Otherwise, the barrier is put into the
318 * ABORT-state and will stay there. The other side is notified about the
319 * abortion. Any following attempt to place normal barriers or to wait on normal
320 * barriers will return immediately as "false".
321 *
322 * You can wait for the other side to call barrier_abort(), too. Use
323 * barrier_wait_abortion() for that.
324 *
325 * Returns: false if the other side already aborted, true otherwise.
326 */
327bool barrier_abort(Barrier *b) {
328 assert(b);
329
330 barrier_write(b, BARRIER_ABORTION);
331 return !barrier_they_aborted(b);
332}
333
334/**
335 * barrier_wait_next() - Wait for the next barrier of the other side
336 * @b: barrier to operate on
337 *
338 * This waits until the other side places its next barrier. This is independent
339 * of any barrier-links and just waits for any next barrier of the other side.
340 *
341 * If either side aborted, this returns false.
342 *
343 * Returns: false if either side aborted, true otherwise.
344 */
345bool barrier_wait_next(Barrier *b) {
346 assert(b);
347
348 if (barrier_is_aborted(b))
349 return false;
350
351 barrier_read(b, b->barriers - 1);
352 return !barrier_is_aborted(b);
353}
354
355/**
356 * barrier_wait_abortion() - Wait for the other side to abort
357 * @b: barrier to operate on
358 *
359 * This waits until the other side called barrier_abort(). This can be called
360 * regardless whether the local side already called barrier_abort() or not.
361 *
362 * If the other side has already aborted, this returns immediately.
363 *
364 * Returns: false if the local side aborted, true otherwise.
365 */
366bool barrier_wait_abortion(Barrier *b) {
367 assert(b);
368
369 barrier_read(b, BARRIER_THEY_ABORTED);
370 return !barrier_i_aborted(b);
371}
372
373/**
374 * barrier_sync_next() - Wait for the other side to place a next linked barrier
375 * @b: barrier to operate on
376 *
377 * This is like barrier_wait_next() and waits for the other side to call
378 * barrier_place(). However, this only waits for linked barriers. That means, if
379 * the other side already placed more barriers than (or as much as) we did, this
380 * returns immediately instead of waiting.
381 *
382 * If either side aborted, this returns false.
383 *
384 * Returns: false if either side aborted, true otherwise.
385 */
386bool barrier_sync_next(Barrier *b) {
387 assert(b);
388
389 if (barrier_is_aborted(b))
390 return false;
391
392 barrier_read(b, MAX((int64_t)0, b->barriers - 1));
393 return !barrier_is_aborted(b);
394}
395
396/**
397 * barrier_sync() - Wait for the other side to place as many barriers as we did
398 * @b: barrier to operate on
399 *
400 * This is like barrier_sync_next() but waits for the other side to call
401 * barrier_place() as often as we did (in total). If they already placed as much
402 * as we did (or more), this returns immediately instead of waiting.
403 *
404 * If either side aborted, this returns false.
405 *
406 * Returns: false if either side aborted, true otherwise.
407 */
408bool barrier_sync(Barrier *b) {
409 assert(b);
410
411 if (barrier_is_aborted(b))
412 return false;
413
414 barrier_read(b, 0);
415 return !barrier_is_aborted(b);
416}