]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/shared/barrier.c
Merge pull request #11827 from keszybz/pkgconfig-variables
[thirdparty/systemd.git] / src / shared / barrier.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <poll.h>
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <sys/eventfd.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12
13 #include "barrier.h"
14 #include "fd-util.h"
15 #include "macro.h"
16
17 /**
18 * Barriers
19 * This barrier implementation provides a simple synchronization method based
20 * on file-descriptors that can safely be used between threads and processes. A
21 * barrier object contains 2 shared counters based on eventfd. Both processes
22 * can now place barriers and wait for the other end to reach a random or
23 * specific barrier.
24 * Barriers are numbered, so you can either wait for the other end to reach any
25 * barrier or the last barrier that you placed. This way, you can use barriers
26 * for one-way *and* full synchronization. Note that even-though barriers are
27 * numbered, these numbers are internal and recycled once both sides reached the
28 * same barrier (implemented as a simple signed counter). It is thus not
29 * possible to address barriers by their ID.
30 *
31 * Barrier-API: Both ends can place as many barriers via barrier_place() as
32 * they want and each pair of barriers on both sides will be implicitly linked.
33 * Each side can use the barrier_wait/sync_*() family of calls to wait for the
34 * other side to place a specific barrier. barrier_wait_next() waits until the
35 * other side calls barrier_place(). No links between the barriers are
36 * considered and this simply serves as most basic asynchronous barrier.
37 * barrier_sync_next() is like barrier_wait_next() and waits for the other side
38 * to place their next barrier via barrier_place(). However, it only waits for
39 * barriers that are linked to a barrier we already placed. If the other side
40 * already placed more barriers than we did, barrier_sync_next() returns
41 * immediately.
42 * barrier_sync() extends barrier_sync_next() and waits until the other end
43 * placed as many barriers via barrier_place() as we did. If they already placed
44 * as many as we did (or more), it returns immediately.
45 *
46 * Additionally to basic barriers, an abortion event is available.
47 * barrier_abort() places an abortion event that cannot be undone. An abortion
48 * immediately cancels all placed barriers and replaces them. Any running and
49 * following wait/sync call besides barrier_wait_abortion() will immediately
50 * return false on both sides (otherwise, they always return true).
51 * barrier_abort() can be called multiple times on both ends and will be a
52 * no-op if already called on this side.
53 * barrier_wait_abortion() can be used to wait for the other side to call
54 * barrier_abort() and is the only wait/sync call that does not return
55 * immediately if we aborted outself. It only returns once the other side
56 * called barrier_abort().
57 *
58 * Barriers can be used for in-process and inter-process synchronization.
59 * However, for in-process synchronization you could just use mutexes.
60 * Therefore, main target is IPC and we require both sides to *not* share the FD
61 * table. If that's given, barriers provide target tracking: If the remote side
62 * exit()s, an abortion event is implicitly queued on the other side. This way,
63 * a sync/wait call will be woken up if the remote side crashed or exited
64 * unexpectedly. However, note that these abortion events are only queued if the
65 * barrier-queue has been drained. Therefore, it is safe to place a barrier and
66 * exit. The other side can safely wait on the barrier even though the exit
67 * queued an abortion event. Usually, the abortion event would overwrite the
68 * barrier, however, that's not true for exit-abortion events. Those are only
69 * queued if the barrier-queue is drained (thus, the receiving side has placed
70 * more barriers than the remote side).
71 */
72
73 /**
74 * barrier_create() - Initialize a barrier object
75 * @obj: barrier to initialize
76 *
77 * This initializes a barrier object. The caller is responsible of allocating
78 * the memory and keeping it valid. The memory does not have to be zeroed
79 * beforehand.
80 * Two eventfd objects are allocated for each barrier. If allocation fails, an
81 * error is returned.
82 *
83 * If this function fails, the barrier is reset to an invalid state so it is
84 * safe to call barrier_destroy() on the object regardless whether the
85 * initialization succeeded or not.
86 *
87 * The caller is responsible to destroy the object via barrier_destroy() before
88 * releasing the underlying memory.
89 *
90 * Returns: 0 on success, negative error code on failure.
91 */
92 int barrier_create(Barrier *b) {
93 _cleanup_(barrier_destroyp) Barrier *staging = b;
94 int r;
95
96 assert(b);
97
98 b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
99 if (b->me < 0)
100 return -errno;
101
102 b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
103 if (b->them < 0)
104 return -errno;
105
106 r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
107 if (r < 0)
108 return -errno;
109
110 staging = NULL;
111 return 0;
112 }
113
114 /**
115 * barrier_destroy() - Destroy a barrier object
116 * @b: barrier to destroy or NULL
117 *
118 * This destroys a barrier object that has previously been passed to
119 * barrier_create(). The object is released and reset to invalid
120 * state. Therefore, it is safe to call barrier_destroy() multiple
121 * times or even if barrier_create() failed. However, barrier must be
122 * always initialized with BARRIER_NULL.
123 *
124 * If @b is NULL, this is a no-op.
125 */
126 void barrier_destroy(Barrier *b) {
127 if (!b)
128 return;
129
130 b->me = safe_close(b->me);
131 b->them = safe_close(b->them);
132 safe_close_pair(b->pipe);
133 b->barriers = 0;
134 }
135
136 /**
137 * barrier_set_role() - Set the local role of the barrier
138 * @b: barrier to operate on
139 * @role: role to set on the barrier
140 *
141 * This sets the roles on a barrier object. This is needed to know
142 * which side of the barrier you're on. Usually, the parent creates
143 * the barrier via barrier_create() and then calls fork() or clone().
144 * Therefore, the FDs are duplicated and the child retains the same
145 * barrier object.
146 *
147 * Both sides need to call barrier_set_role() after fork() or clone()
148 * are done. If this is not done, barriers will not work correctly.
149 *
150 * Note that barriers could be supported without fork() or clone(). However,
151 * this is currently not needed so it hasn't been implemented.
152 */
153 void barrier_set_role(Barrier *b, unsigned role) {
154 assert(b);
155 assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
156 /* make sure this is only called once */
157 assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
158
159 if (role == BARRIER_PARENT)
160 b->pipe[1] = safe_close(b->pipe[1]);
161 else {
162 b->pipe[0] = safe_close(b->pipe[0]);
163
164 /* swap me/them for children */
165 SWAP_TWO(b->me, b->them);
166 }
167 }
168
169 /* places barrier; returns false if we aborted, otherwise true */
170 static bool barrier_write(Barrier *b, uint64_t buf) {
171 ssize_t len;
172
173 /* prevent new sync-points if we already aborted */
174 if (barrier_i_aborted(b))
175 return false;
176
177 assert(b->me >= 0);
178 do {
179 len = write(b->me, &buf, sizeof(buf));
180 } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
181
182 if (len != sizeof(buf))
183 goto error;
184
185 /* lock if we aborted */
186 if (buf >= (uint64_t)BARRIER_ABORTION) {
187 if (barrier_they_aborted(b))
188 b->barriers = BARRIER_WE_ABORTED;
189 else
190 b->barriers = BARRIER_I_ABORTED;
191 } else if (!barrier_is_aborted(b))
192 b->barriers += buf;
193
194 return !barrier_i_aborted(b);
195
196 error:
197 /* If there is an unexpected error, we have to make this fatal. There
198 * is no way we can recover from sync-errors. Therefore, we close the
199 * pipe-ends and treat this as abortion. The other end will notice the
200 * pipe-close and treat it as abortion, too. */
201
202 safe_close_pair(b->pipe);
203 b->barriers = BARRIER_WE_ABORTED;
204 return false;
205 }
206
207 /* waits for barriers; returns false if they aborted, otherwise true */
208 static bool barrier_read(Barrier *b, int64_t comp) {
209 if (barrier_they_aborted(b))
210 return false;
211
212 while (b->barriers > comp) {
213 struct pollfd pfd[2] = {
214 { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
215 .events = POLLHUP },
216 { .fd = b->them,
217 .events = POLLIN }};
218 uint64_t buf;
219 int r;
220
221 r = poll(pfd, 2, -1);
222 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
223 continue;
224 else if (r < 0)
225 goto error;
226
227 if (pfd[1].revents) {
228 ssize_t len;
229
230 /* events on @them signal new data for us */
231 len = read(b->them, &buf, sizeof(buf));
232 if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
233 continue;
234
235 if (len != sizeof(buf))
236 goto error;
237 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
238 /* POLLHUP on the pipe tells us the other side exited.
239 * We treat this as implicit abortion. But we only
240 * handle it if there's no event on the eventfd. This
241 * guarantees that exit-abortions do not overwrite real
242 * barriers. */
243 buf = BARRIER_ABORTION;
244 else
245 continue;
246
247 /* lock if they aborted */
248 if (buf >= (uint64_t)BARRIER_ABORTION) {
249 if (barrier_i_aborted(b))
250 b->barriers = BARRIER_WE_ABORTED;
251 else
252 b->barriers = BARRIER_THEY_ABORTED;
253 } else if (!barrier_is_aborted(b))
254 b->barriers -= buf;
255 }
256
257 return !barrier_they_aborted(b);
258
259 error:
260 /* If there is an unexpected error, we have to make this fatal. There
261 * is no way we can recover from sync-errors. Therefore, we close the
262 * pipe-ends and treat this as abortion. The other end will notice the
263 * pipe-close and treat it as abortion, too. */
264
265 safe_close_pair(b->pipe);
266 b->barriers = BARRIER_WE_ABORTED;
267 return false;
268 }
269
270 /**
271 * barrier_place() - Place a new barrier
272 * @b: barrier object
273 *
274 * This places a new barrier on the barrier object. If either side already
275 * aborted, this is a no-op and returns "false". Otherwise, the barrier is
276 * placed and this returns "true".
277 *
278 * Returns: true if barrier was placed, false if either side aborted.
279 */
280 bool barrier_place(Barrier *b) {
281 assert(b);
282
283 if (barrier_is_aborted(b))
284 return false;
285
286 barrier_write(b, BARRIER_SINGLE);
287 return true;
288 }
289
290 /**
291 * barrier_abort() - Abort the synchronization
292 * @b: barrier object to abort
293 *
294 * This aborts the barrier-synchronization. If barrier_abort() was already
295 * called on this side, this is a no-op. Otherwise, the barrier is put into the
296 * ABORT-state and will stay there. The other side is notified about the
297 * abortion. Any following attempt to place normal barriers or to wait on normal
298 * barriers will return immediately as "false".
299 *
300 * You can wait for the other side to call barrier_abort(), too. Use
301 * barrier_wait_abortion() for that.
302 *
303 * Returns: false if the other side already aborted, true otherwise.
304 */
305 bool barrier_abort(Barrier *b) {
306 assert(b);
307
308 barrier_write(b, BARRIER_ABORTION);
309 return !barrier_they_aborted(b);
310 }
311
312 /**
313 * barrier_wait_next() - Wait for the next barrier of the other side
314 * @b: barrier to operate on
315 *
316 * This waits until the other side places its next barrier. This is independent
317 * of any barrier-links and just waits for any next barrier of the other side.
318 *
319 * If either side aborted, this returns false.
320 *
321 * Returns: false if either side aborted, true otherwise.
322 */
323 bool barrier_wait_next(Barrier *b) {
324 assert(b);
325
326 if (barrier_is_aborted(b))
327 return false;
328
329 barrier_read(b, b->barriers - 1);
330 return !barrier_is_aborted(b);
331 }
332
333 /**
334 * barrier_wait_abortion() - Wait for the other side to abort
335 * @b: barrier to operate on
336 *
337 * This waits until the other side called barrier_abort(). This can be called
338 * regardless whether the local side already called barrier_abort() or not.
339 *
340 * If the other side has already aborted, this returns immediately.
341 *
342 * Returns: false if the local side aborted, true otherwise.
343 */
344 bool barrier_wait_abortion(Barrier *b) {
345 assert(b);
346
347 barrier_read(b, BARRIER_THEY_ABORTED);
348 return !barrier_i_aborted(b);
349 }
350
351 /**
352 * barrier_sync_next() - Wait for the other side to place a next linked barrier
353 * @b: barrier to operate on
354 *
355 * This is like barrier_wait_next() and waits for the other side to call
356 * barrier_place(). However, this only waits for linked barriers. That means, if
357 * the other side already placed more barriers than (or as much as) we did, this
358 * returns immediately instead of waiting.
359 *
360 * If either side aborted, this returns false.
361 *
362 * Returns: false if either side aborted, true otherwise.
363 */
364 bool barrier_sync_next(Barrier *b) {
365 assert(b);
366
367 if (barrier_is_aborted(b))
368 return false;
369
370 barrier_read(b, MAX((int64_t)0, b->barriers - 1));
371 return !barrier_is_aborted(b);
372 }
373
374 /**
375 * barrier_sync() - Wait for the other side to place as many barriers as we did
376 * @b: barrier to operate on
377 *
378 * This is like barrier_sync_next() but waits for the other side to call
379 * barrier_place() as often as we did (in total). If they already placed as much
380 * as we did (or more), this returns immediately instead of waiting.
381 *
382 * If either side aborted, this returns false.
383 *
384 * Returns: false if either side aborted, true otherwise.
385 */
386 bool barrier_sync(Barrier *b) {
387 assert(b);
388
389 if (barrier_is_aborted(b))
390 return false;
391
392 barrier_read(b, 0);
393 return !barrier_is_aborted(b);
394 }