]> git.ipfire.org Git - thirdparty/systemd.git/blob - src/basic/barrier.c
pkgconfig: define variables relative to ${prefix}/${rootprefix}/${sysconfdir}
[thirdparty/systemd.git] / src / basic / barrier.c
1 /* SPDX-License-Identifier: LGPL-2.1+ */
2
3 #include <errno.h>
4 #include <fcntl.h>
5 #include <poll.h>
6 #include <stdbool.h>
7 #include <stdint.h>
8 #include <stdlib.h>
9 #include <sys/eventfd.h>
10 #include <sys/types.h>
11 #include <unistd.h>
12
13 #include "barrier.h"
14 #include "fd-util.h"
15 #include "macro.h"
16
17 /**
18 * Barriers
19 * This barrier implementation provides a simple synchronization method based
20 * on file-descriptors that can safely be used between threads and processes. A
21 * barrier object contains 2 shared counters based on eventfd. Both processes
22 * can now place barriers and wait for the other end to reach a random or
23 * specific barrier.
24 * Barriers are numbered, so you can either wait for the other end to reach any
25 * barrier or the last barrier that you placed. This way, you can use barriers
26 * for one-way *and* full synchronization. Note that even-though barriers are
27 * numbered, these numbers are internal and recycled once both sides reached the
28 * same barrier (implemented as a simple signed counter). It is thus not
29 * possible to address barriers by their ID.
30 *
31 * Barrier-API: Both ends can place as many barriers via barrier_place() as
32 * they want and each pair of barriers on both sides will be implicitly linked.
33 * Each side can use the barrier_wait/sync_*() family of calls to wait for the
34 * other side to place a specific barrier. barrier_wait_next() waits until the
35 * other side calls barrier_place(). No links between the barriers are
36 * considered and this simply serves as most basic asynchronous barrier.
37 * barrier_sync_next() is like barrier_wait_next() and waits for the other side
38 * to place their next barrier via barrier_place(). However, it only waits for
39 * barriers that are linked to a barrier we already placed. If the other side
40 * already placed more barriers than we did, barrier_sync_next() returns
41 * immediately.
42 * barrier_sync() extends barrier_sync_next() and waits until the other end
43 * placed as many barriers via barrier_place() as we did. If they already placed
44 * as many as we did (or more), it returns immediately.
45 *
46 * Additionally to basic barriers, an abortion event is available.
47 * barrier_abort() places an abortion event that cannot be undone. An abortion
48 * immediately cancels all placed barriers and replaces them. Any running and
49 * following wait/sync call besides barrier_wait_abortion() will immediately
50 * return false on both sides (otherwise, they always return true).
51 * barrier_abort() can be called multiple times on both ends and will be a
52 * no-op if already called on this side.
53 * barrier_wait_abortion() can be used to wait for the other side to call
54 * barrier_abort() and is the only wait/sync call that does not return
55 * immediately if we aborted outself. It only returns once the other side
56 * called barrier_abort().
57 *
58 * Barriers can be used for in-process and inter-process synchronization.
59 * However, for in-process synchronization you could just use mutexes.
60 * Therefore, main target is IPC and we require both sides to *not* share the FD
61 * table. If that's given, barriers provide target tracking: If the remote side
62 * exit()s, an abortion event is implicitly queued on the other side. This way,
63 * a sync/wait call will be woken up if the remote side crashed or exited
64 * unexpectedly. However, note that these abortion events are only queued if the
65 * barrier-queue has been drained. Therefore, it is safe to place a barrier and
66 * exit. The other side can safely wait on the barrier even though the exit
67 * queued an abortion event. Usually, the abortion event would overwrite the
68 * barrier, however, that's not true for exit-abortion events. Those are only
69 * queued if the barrier-queue is drained (thus, the receiving side has placed
70 * more barriers than the remote side).
71 */
72
73 /**
74 * barrier_create() - Initialize a barrier object
75 * @obj: barrier to initialize
76 *
77 * This initializes a barrier object. The caller is responsible of allocating
78 * the memory and keeping it valid. The memory does not have to be zeroed
79 * beforehand.
80 * Two eventfd objects are allocated for each barrier. If allocation fails, an
81 * error is returned.
82 *
83 * If this function fails, the barrier is reset to an invalid state so it is
84 * safe to call barrier_destroy() on the object regardless whether the
85 * initialization succeeded or not.
86 *
87 * The caller is responsible to destroy the object via barrier_destroy() before
88 * releasing the underlying memory.
89 *
90 * Returns: 0 on success, negative error code on failure.
91 */
92 int barrier_create(Barrier *b) {
93 _cleanup_(barrier_destroyp) Barrier *staging = b;
94 int r;
95
96 assert(b);
97
98 b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
99 if (b->me < 0)
100 return -errno;
101
102 b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
103 if (b->them < 0)
104 return -errno;
105
106 r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
107 if (r < 0)
108 return -errno;
109
110 staging = NULL;
111 return 0;
112 }
113
114 /**
115 * barrier_destroy() - Destroy a barrier object
116 * @b: barrier to destroy or NULL
117 *
118 * This destroys a barrier object that has previously been passed to
119 * barrier_create(). The object is released and reset to invalid
120 * state. Therefore, it is safe to call barrier_destroy() multiple
121 * times or even if barrier_create() failed. However, barrier must be
122 * always initialized with BARRIER_NULL.
123 *
124 * If @b is NULL, this is a no-op.
125 */
126 void barrier_destroy(Barrier *b) {
127 if (!b)
128 return;
129
130 b->me = safe_close(b->me);
131 b->them = safe_close(b->them);
132 safe_close_pair(b->pipe);
133 b->barriers = 0;
134 }
135
136 /**
137 * barrier_set_role() - Set the local role of the barrier
138 * @b: barrier to operate on
139 * @role: role to set on the barrier
140 *
141 * This sets the roles on a barrier object. This is needed to know
142 * which side of the barrier you're on. Usually, the parent creates
143 * the barrier via barrier_create() and then calls fork() or clone().
144 * Therefore, the FDs are duplicated and the child retains the same
145 * barrier object.
146 *
147 * Both sides need to call barrier_set_role() after fork() or clone()
148 * are done. If this is not done, barriers will not work correctly.
149 *
150 * Note that barriers could be supported without fork() or clone(). However,
151 * this is currently not needed so it hasn't been implemented.
152 */
153 void barrier_set_role(Barrier *b, unsigned role) {
154 int fd;
155
156 assert(b);
157 assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
158 /* make sure this is only called once */
159 assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
160
161 if (role == BARRIER_PARENT)
162 b->pipe[1] = safe_close(b->pipe[1]);
163 else {
164 b->pipe[0] = safe_close(b->pipe[0]);
165
166 /* swap me/them for children */
167 fd = b->me;
168 b->me = b->them;
169 b->them = fd;
170 }
171 }
172
173 /* places barrier; returns false if we aborted, otherwise true */
174 static bool barrier_write(Barrier *b, uint64_t buf) {
175 ssize_t len;
176
177 /* prevent new sync-points if we already aborted */
178 if (barrier_i_aborted(b))
179 return false;
180
181 assert(b->me >= 0);
182 do {
183 len = write(b->me, &buf, sizeof(buf));
184 } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
185
186 if (len != sizeof(buf))
187 goto error;
188
189 /* lock if we aborted */
190 if (buf >= (uint64_t)BARRIER_ABORTION) {
191 if (barrier_they_aborted(b))
192 b->barriers = BARRIER_WE_ABORTED;
193 else
194 b->barriers = BARRIER_I_ABORTED;
195 } else if (!barrier_is_aborted(b))
196 b->barriers += buf;
197
198 return !barrier_i_aborted(b);
199
200 error:
201 /* If there is an unexpected error, we have to make this fatal. There
202 * is no way we can recover from sync-errors. Therefore, we close the
203 * pipe-ends and treat this as abortion. The other end will notice the
204 * pipe-close and treat it as abortion, too. */
205
206 safe_close_pair(b->pipe);
207 b->barriers = BARRIER_WE_ABORTED;
208 return false;
209 }
210
211 /* waits for barriers; returns false if they aborted, otherwise true */
212 static bool barrier_read(Barrier *b, int64_t comp) {
213 if (barrier_they_aborted(b))
214 return false;
215
216 while (b->barriers > comp) {
217 struct pollfd pfd[2] = {
218 { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
219 .events = POLLHUP },
220 { .fd = b->them,
221 .events = POLLIN }};
222 uint64_t buf;
223 int r;
224
225 r = poll(pfd, 2, -1);
226 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
227 continue;
228 else if (r < 0)
229 goto error;
230
231 if (pfd[1].revents) {
232 ssize_t len;
233
234 /* events on @them signal new data for us */
235 len = read(b->them, &buf, sizeof(buf));
236 if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
237 continue;
238
239 if (len != sizeof(buf))
240 goto error;
241 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
242 /* POLLHUP on the pipe tells us the other side exited.
243 * We treat this as implicit abortion. But we only
244 * handle it if there's no event on the eventfd. This
245 * guarantees that exit-abortions do not overwrite real
246 * barriers. */
247 buf = BARRIER_ABORTION;
248 else
249 continue;
250
251 /* lock if they aborted */
252 if (buf >= (uint64_t)BARRIER_ABORTION) {
253 if (barrier_i_aborted(b))
254 b->barriers = BARRIER_WE_ABORTED;
255 else
256 b->barriers = BARRIER_THEY_ABORTED;
257 } else if (!barrier_is_aborted(b))
258 b->barriers -= buf;
259 }
260
261 return !barrier_they_aborted(b);
262
263 error:
264 /* If there is an unexpected error, we have to make this fatal. There
265 * is no way we can recover from sync-errors. Therefore, we close the
266 * pipe-ends and treat this as abortion. The other end will notice the
267 * pipe-close and treat it as abortion, too. */
268
269 safe_close_pair(b->pipe);
270 b->barriers = BARRIER_WE_ABORTED;
271 return false;
272 }
273
274 /**
275 * barrier_place() - Place a new barrier
276 * @b: barrier object
277 *
278 * This places a new barrier on the barrier object. If either side already
279 * aborted, this is a no-op and returns "false". Otherwise, the barrier is
280 * placed and this returns "true".
281 *
282 * Returns: true if barrier was placed, false if either side aborted.
283 */
284 bool barrier_place(Barrier *b) {
285 assert(b);
286
287 if (barrier_is_aborted(b))
288 return false;
289
290 barrier_write(b, BARRIER_SINGLE);
291 return true;
292 }
293
294 /**
295 * barrier_abort() - Abort the synchronization
296 * @b: barrier object to abort
297 *
298 * This aborts the barrier-synchronization. If barrier_abort() was already
299 * called on this side, this is a no-op. Otherwise, the barrier is put into the
300 * ABORT-state and will stay there. The other side is notified about the
301 * abortion. Any following attempt to place normal barriers or to wait on normal
302 * barriers will return immediately as "false".
303 *
304 * You can wait for the other side to call barrier_abort(), too. Use
305 * barrier_wait_abortion() for that.
306 *
307 * Returns: false if the other side already aborted, true otherwise.
308 */
309 bool barrier_abort(Barrier *b) {
310 assert(b);
311
312 barrier_write(b, BARRIER_ABORTION);
313 return !barrier_they_aborted(b);
314 }
315
316 /**
317 * barrier_wait_next() - Wait for the next barrier of the other side
318 * @b: barrier to operate on
319 *
320 * This waits until the other side places its next barrier. This is independent
321 * of any barrier-links and just waits for any next barrier of the other side.
322 *
323 * If either side aborted, this returns false.
324 *
325 * Returns: false if either side aborted, true otherwise.
326 */
327 bool barrier_wait_next(Barrier *b) {
328 assert(b);
329
330 if (barrier_is_aborted(b))
331 return false;
332
333 barrier_read(b, b->barriers - 1);
334 return !barrier_is_aborted(b);
335 }
336
337 /**
338 * barrier_wait_abortion() - Wait for the other side to abort
339 * @b: barrier to operate on
340 *
341 * This waits until the other side called barrier_abort(). This can be called
342 * regardless whether the local side already called barrier_abort() or not.
343 *
344 * If the other side has already aborted, this returns immediately.
345 *
346 * Returns: false if the local side aborted, true otherwise.
347 */
348 bool barrier_wait_abortion(Barrier *b) {
349 assert(b);
350
351 barrier_read(b, BARRIER_THEY_ABORTED);
352 return !barrier_i_aborted(b);
353 }
354
355 /**
356 * barrier_sync_next() - Wait for the other side to place a next linked barrier
357 * @b: barrier to operate on
358 *
359 * This is like barrier_wait_next() and waits for the other side to call
360 * barrier_place(). However, this only waits for linked barriers. That means, if
361 * the other side already placed more barriers than (or as much as) we did, this
362 * returns immediately instead of waiting.
363 *
364 * If either side aborted, this returns false.
365 *
366 * Returns: false if either side aborted, true otherwise.
367 */
368 bool barrier_sync_next(Barrier *b) {
369 assert(b);
370
371 if (barrier_is_aborted(b))
372 return false;
373
374 barrier_read(b, MAX((int64_t)0, b->barriers - 1));
375 return !barrier_is_aborted(b);
376 }
377
378 /**
379 * barrier_sync() - Wait for the other side to place as many barriers as we did
380 * @b: barrier to operate on
381 *
382 * This is like barrier_sync_next() but waits for the other side to call
383 * barrier_place() as often as we did (in total). If they already placed as much
384 * as we did (or more), this returns immediately instead of waiting.
385 *
386 * If either side aborted, this returns false.
387 *
388 * Returns: false if either side aborted, true otherwise.
389 */
390 bool barrier_sync(Barrier *b) {
391 assert(b);
392
393 if (barrier_is_aborted(b))
394 return false;
395
396 barrier_read(b, 0);
397 return !barrier_is_aborted(b);
398 }