]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/barrier.c
b89167d415d5e8d50fe09923b33cf666307a25cf
[thirdparty/systemd.git] / src / basic / barrier.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2 /***
3 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
4 ***/
5
6 #include <errno.h>
7 #include <fcntl.h>
8 #include <poll.h>
9 #include <stdbool.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <sys/eventfd.h>
13 #include <sys/types.h>
14 #include <unistd.h>
15
16 #include "barrier.h"
17 #include "fd-util.h"
18 #include "macro.h"
19
20 /**
21 * Barriers
22 * This barrier implementation provides a simple synchronization method based
23 * on file-descriptors that can safely be used between threads and processes. A
24 * barrier object contains 2 shared counters based on eventfd. Both processes
25 * can now place barriers and wait for the other end to reach a random or
26 * specific barrier.
27 * Barriers are numbered, so you can either wait for the other end to reach any
28 * barrier or the last barrier that you placed. This way, you can use barriers
29 * for one-way *and* full synchronization. Note that even-though barriers are
30 * numbered, these numbers are internal and recycled once both sides reached the
31 * same barrier (implemented as a simple signed counter). It is thus not
32 * possible to address barriers by their ID.
33 *
34 * Barrier-API: Both ends can place as many barriers via barrier_place() as
35 * they want and each pair of barriers on both sides will be implicitly linked.
36 * Each side can use the barrier_wait/sync_*() family of calls to wait for the
37 * other side to place a specific barrier. barrier_wait_next() waits until the
38 * other side calls barrier_place(). No links between the barriers are
39 * considered and this simply serves as most basic asynchronous barrier.
40 * barrier_sync_next() is like barrier_wait_next() and waits for the other side
41 * to place their next barrier via barrier_place(). However, it only waits for
42 * barriers that are linked to a barrier we already placed. If the other side
43 * already placed more barriers than we did, barrier_sync_next() returns
44 * immediately.
45 * barrier_sync() extends barrier_sync_next() and waits until the other end
46 * placed as many barriers via barrier_place() as we did. If they already placed
47 * as many as we did (or more), it returns immediately.
48 *
49 * Additionally to basic barriers, an abortion event is available.
50 * barrier_abort() places an abortion event that cannot be undone. An abortion
51 * immediately cancels all placed barriers and replaces them. Any running and
52 * following wait/sync call besides barrier_wait_abortion() will immediately
53 * return false on both sides (otherwise, they always return true).
54 * barrier_abort() can be called multiple times on both ends and will be a
55 * no-op if already called on this side.
56 * barrier_wait_abortion() can be used to wait for the other side to call
57 * barrier_abort() and is the only wait/sync call that does not return
58 * immediately if we aborted outself. It only returns once the other side
59 * called barrier_abort().
60 *
61 * Barriers can be used for in-process and inter-process synchronization.
62 * However, for in-process synchronization you could just use mutexes.
63 * Therefore, main target is IPC and we require both sides to *not* share the FD
64 * table. If that's given, barriers provide target tracking: If the remote side
65 * exit()s, an abortion event is implicitly queued on the other side. This way,
66 * a sync/wait call will be woken up if the remote side crashed or exited
67 * unexpectedly. However, note that these abortion events are only queued if the
68 * barrier-queue has been drained. Therefore, it is safe to place a barrier and
69 * exit. The other side can safely wait on the barrier even though the exit
70 * queued an abortion event. Usually, the abortion event would overwrite the
71 * barrier, however, that's not true for exit-abortion events. Those are only
72 * queued if the barrier-queue is drained (thus, the receiving side has placed
73 * more barriers than the remote side).
74 */
75
76 /**
77 * barrier_create() - Initialize a barrier object
78 * @obj: barrier to initialize
79 *
80 * This initializes a barrier object. The caller is responsible of allocating
81 * the memory and keeping it valid. The memory does not have to be zeroed
82 * beforehand.
83 * Two eventfd objects are allocated for each barrier. If allocation fails, an
84 * error is returned.
85 *
86 * If this function fails, the barrier is reset to an invalid state so it is
87 * safe to call barrier_destroy() on the object regardless whether the
88 * initialization succeeded or not.
89 *
90 * The caller is responsible to destroy the object via barrier_destroy() before
91 * releasing the underlying memory.
92 *
93 * Returns: 0 on success, negative error code on failure.
94 */
95 int barrier_create(Barrier *b) {
96 _cleanup_(barrier_destroyp) Barrier *staging = b;
97 int r;
98
99 assert(b);
100
101 b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
102 if (b->me < 0)
103 return -errno;
104
105 b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
106 if (b->them < 0)
107 return -errno;
108
109 r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
110 if (r < 0)
111 return -errno;
112
113 staging = NULL;
114 return 0;
115 }
116
117 /**
118 * barrier_destroy() - Destroy a barrier object
119 * @b: barrier to destroy or NULL
120 *
121 * This destroys a barrier object that has previously been passed to
122 * barrier_create(). The object is released and reset to invalid
123 * state. Therefore, it is safe to call barrier_destroy() multiple
124 * times or even if barrier_create() failed. However, barrier must be
125 * always initialized with BARRIER_NULL.
126 *
127 * If @b is NULL, this is a no-op.
128 */
129 void barrier_destroy(Barrier *b) {
130 if (!b)
131 return;
132
133 b->me = safe_close(b->me);
134 b->them = safe_close(b->them);
135 safe_close_pair(b->pipe);
136 b->barriers = 0;
137 }
138
139 /**
140 * barrier_set_role() - Set the local role of the barrier
141 * @b: barrier to operate on
142 * @role: role to set on the barrier
143 *
144 * This sets the roles on a barrier object. This is needed to know
145 * which side of the barrier you're on. Usually, the parent creates
146 * the barrier via barrier_create() and then calls fork() or clone().
147 * Therefore, the FDs are duplicated and the child retains the same
148 * barrier object.
149 *
150 * Both sides need to call barrier_set_role() after fork() or clone()
151 * are done. If this is not done, barriers will not work correctly.
152 *
153 * Note that barriers could be supported without fork() or clone(). However,
154 * this is currently not needed so it hasn't been implemented.
155 */
156 void barrier_set_role(Barrier *b, unsigned int role) {
157 int fd;
158
159 assert(b);
160 assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
161 /* make sure this is only called once */
162 assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
163
164 if (role == BARRIER_PARENT)
165 b->pipe[1] = safe_close(b->pipe[1]);
166 else {
167 b->pipe[0] = safe_close(b->pipe[0]);
168
169 /* swap me/them for children */
170 fd = b->me;
171 b->me = b->them;
172 b->them = fd;
173 }
174 }
175
176 /* places barrier; returns false if we aborted, otherwise true */
177 static bool barrier_write(Barrier *b, uint64_t buf) {
178 ssize_t len;
179
180 /* prevent new sync-points if we already aborted */
181 if (barrier_i_aborted(b))
182 return false;
183
184 assert(b->me >= 0);
185 do {
186 len = write(b->me, &buf, sizeof(buf));
187 } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
188
189 if (len != sizeof(buf))
190 goto error;
191
192 /* lock if we aborted */
193 if (buf >= (uint64_t)BARRIER_ABORTION) {
194 if (barrier_they_aborted(b))
195 b->barriers = BARRIER_WE_ABORTED;
196 else
197 b->barriers = BARRIER_I_ABORTED;
198 } else if (!barrier_is_aborted(b))
199 b->barriers += buf;
200
201 return !barrier_i_aborted(b);
202
203 error:
204 /* If there is an unexpected error, we have to make this fatal. There
205 * is no way we can recover from sync-errors. Therefore, we close the
206 * pipe-ends and treat this as abortion. The other end will notice the
207 * pipe-close and treat it as abortion, too. */
208
209 safe_close_pair(b->pipe);
210 b->barriers = BARRIER_WE_ABORTED;
211 return false;
212 }
213
214 /* waits for barriers; returns false if they aborted, otherwise true */
215 static bool barrier_read(Barrier *b, int64_t comp) {
216 if (barrier_they_aborted(b))
217 return false;
218
219 while (b->barriers > comp) {
220 struct pollfd pfd[2] = {
221 { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
222 .events = POLLHUP },
223 { .fd = b->them,
224 .events = POLLIN }};
225 uint64_t buf;
226 int r;
227
228 r = poll(pfd, 2, -1);
229 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
230 continue;
231 else if (r < 0)
232 goto error;
233
234 if (pfd[1].revents) {
235 ssize_t len;
236
237 /* events on @them signal new data for us */
238 len = read(b->them, &buf, sizeof(buf));
239 if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
240 continue;
241
242 if (len != sizeof(buf))
243 goto error;
244 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
245 /* POLLHUP on the pipe tells us the other side exited.
246 * We treat this as implicit abortion. But we only
247 * handle it if there's no event on the eventfd. This
248 * guarantees that exit-abortions do not overwrite real
249 * barriers. */
250 buf = BARRIER_ABORTION;
251 else
252 continue;
253
254 /* lock if they aborted */
255 if (buf >= (uint64_t)BARRIER_ABORTION) {
256 if (barrier_i_aborted(b))
257 b->barriers = BARRIER_WE_ABORTED;
258 else
259 b->barriers = BARRIER_THEY_ABORTED;
260 } else if (!barrier_is_aborted(b))
261 b->barriers -= buf;
262 }
263
264 return !barrier_they_aborted(b);
265
266 error:
267 /* If there is an unexpected error, we have to make this fatal. There
268 * is no way we can recover from sync-errors. Therefore, we close the
269 * pipe-ends and treat this as abortion. The other end will notice the
270 * pipe-close and treat it as abortion, too. */
271
272 safe_close_pair(b->pipe);
273 b->barriers = BARRIER_WE_ABORTED;
274 return false;
275 }
276
277 /**
278 * barrier_place() - Place a new barrier
279 * @b: barrier object
280 *
281 * This places a new barrier on the barrier object. If either side already
282 * aborted, this is a no-op and returns "false". Otherwise, the barrier is
283 * placed and this returns "true".
284 *
285 * Returns: true if barrier was placed, false if either side aborted.
286 */
287 bool barrier_place(Barrier *b) {
288 assert(b);
289
290 if (barrier_is_aborted(b))
291 return false;
292
293 barrier_write(b, BARRIER_SINGLE);
294 return true;
295 }
296
297 /**
298 * barrier_abort() - Abort the synchronization
299 * @b: barrier object to abort
300 *
301 * This aborts the barrier-synchronization. If barrier_abort() was already
302 * called on this side, this is a no-op. Otherwise, the barrier is put into the
303 * ABORT-state and will stay there. The other side is notified about the
304 * abortion. Any following attempt to place normal barriers or to wait on normal
305 * barriers will return immediately as "false".
306 *
307 * You can wait for the other side to call barrier_abort(), too. Use
308 * barrier_wait_abortion() for that.
309 *
310 * Returns: false if the other side already aborted, true otherwise.
311 */
312 bool barrier_abort(Barrier *b) {
313 assert(b);
314
315 barrier_write(b, BARRIER_ABORTION);
316 return !barrier_they_aborted(b);
317 }
318
319 /**
320 * barrier_wait_next() - Wait for the next barrier of the other side
321 * @b: barrier to operate on
322 *
323 * This waits until the other side places its next barrier. This is independent
324 * of any barrier-links and just waits for any next barrier of the other side.
325 *
326 * If either side aborted, this returns false.
327 *
328 * Returns: false if either side aborted, true otherwise.
329 */
330 bool barrier_wait_next(Barrier *b) {
331 assert(b);
332
333 if (barrier_is_aborted(b))
334 return false;
335
336 barrier_read(b, b->barriers - 1);
337 return !barrier_is_aborted(b);
338 }
339
340 /**
341 * barrier_wait_abortion() - Wait for the other side to abort
342 * @b: barrier to operate on
343 *
344 * This waits until the other side called barrier_abort(). This can be called
345 * regardless whether the local side already called barrier_abort() or not.
346 *
347 * If the other side has already aborted, this returns immediately.
348 *
349 * Returns: false if the local side aborted, true otherwise.
350 */
351 bool barrier_wait_abortion(Barrier *b) {
352 assert(b);
353
354 barrier_read(b, BARRIER_THEY_ABORTED);
355 return !barrier_i_aborted(b);
356 }
357
358 /**
359 * barrier_sync_next() - Wait for the other side to place a next linked barrier
360 * @b: barrier to operate on
361 *
362 * This is like barrier_wait_next() and waits for the other side to call
363 * barrier_place(). However, this only waits for linked barriers. That means, if
364 * the other side already placed more barriers than (or as much as) we did, this
365 * returns immediately instead of waiting.
366 *
367 * If either side aborted, this returns false.
368 *
369 * Returns: false if either side aborted, true otherwise.
370 */
371 bool barrier_sync_next(Barrier *b) {
372 assert(b);
373
374 if (barrier_is_aborted(b))
375 return false;
376
377 barrier_read(b, MAX((int64_t)0, b->barriers - 1));
378 return !barrier_is_aborted(b);
379 }
380
381 /**
382 * barrier_sync() - Wait for the other side to place as many barriers as we did
383 * @b: barrier to operate on
384 *
385 * This is like barrier_sync_next() but waits for the other side to call
386 * barrier_place() as often as we did (in total). If they already placed as much
387 * as we did (or more), this returns immediately instead of waiting.
388 *
389 * If either side aborted, this returns false.
390 *
391 * Returns: false if either side aborted, true otherwise.
392 */
393 bool barrier_sync(Barrier *b) {
394 assert(b);
395
396 if (barrier_is_aborted(b))
397 return false;
398
399 barrier_read(b, 0);
400 return !barrier_is_aborted(b);
401 }