]> git.ipfire.org Git - thirdparty/systemd.git/blame - src/basic/barrier.c
selinux: use raw variants of security_compute_create and setfscreatecon
[thirdparty/systemd.git] / src / basic / barrier.c
CommitLineData
279da1e3
DH
1/***
2 This file is part of systemd.
3
4 Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
5
6 systemd is free software; you can redistribute it and/or modify it
7 under the terms of the GNU Lesser General Public License as published by
8 the Free Software Foundation; either version 2.1 of the License, or
9 (at your option) any later version.
10
11 systemd is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Lesser General Public License for more details.
15
16 You should have received a copy of the GNU Lesser General Public License
17 along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
20#include <errno.h>
21#include <fcntl.h>
279da1e3
DH
22#include <poll.h>
23#include <stdbool.h>
24#include <stdint.h>
279da1e3 25#include <stdlib.h>
279da1e3
DH
26#include <sys/eventfd.h>
27#include <sys/types.h>
28#include <unistd.h>
29
30#include "barrier.h"
3ffd4af2 31#include "fd-util.h"
279da1e3 32#include "macro.h"
279da1e3
DH
33
34/**
35 * Barriers
36 * This barrier implementation provides a simple synchronization method based
37 * on file-descriptors that can safely be used between threads and processes. A
38 * barrier object contains 2 shared counters based on eventfd. Both processes
39 * can now place barriers and wait for the other end to reach a random or
40 * specific barrier.
41 * Barriers are numbered, so you can either wait for the other end to reach any
42 * barrier or the last barrier that you placed. This way, you can use barriers
43 * for one-way *and* full synchronization. Note that even-though barriers are
44 * numbered, these numbers are internal and recycled once both sides reached the
45 * same barrier (implemented as a simple signed counter). It is thus not
46 * possible to address barriers by their ID.
47 *
48 * Barrier-API: Both ends can place as many barriers via barrier_place() as
49 * they want and each pair of barriers on both sides will be implicitly linked.
50 * Each side can use the barrier_wait/sync_*() family of calls to wait for the
51 * other side to place a specific barrier. barrier_wait_next() waits until the
52 * other side calls barrier_place(). No links between the barriers are
53 * considered and this simply serves as most basic asynchronous barrier.
54 * barrier_sync_next() is like barrier_wait_next() and waits for the other side
55 * to place their next barrier via barrier_place(). However, it only waits for
56 * barriers that are linked to a barrier we already placed. If the other side
57 * already placed more barriers than we did, barrier_sync_next() returns
58 * immediately.
59 * barrier_sync() extends barrier_sync_next() and waits until the other end
60 * placed as many barriers via barrier_place() as we did. If they already placed
61 * as many as we did (or more), it returns immediately.
62 *
63 * Additionally to basic barriers, an abortion event is available.
64 * barrier_abort() places an abortion event that cannot be undone. An abortion
65 * immediately cancels all placed barriers and replaces them. Any running and
66 * following wait/sync call besides barrier_wait_abortion() will immediately
67 * return false on both sides (otherwise, they always return true).
68 * barrier_abort() can be called multiple times on both ends and will be a
69 * no-op if already called on this side.
70 * barrier_wait_abortion() can be used to wait for the other side to call
71 * barrier_abort() and is the only wait/sync call that does not return
72 * immediately if we aborted outself. It only returns once the other side
73 * called barrier_abort().
74 *
75 * Barriers can be used for in-process and inter-process synchronization.
76 * However, for in-process synchronization you could just use mutexes.
77 * Therefore, main target is IPC and we require both sides to *not* share the FD
78 * table. If that's given, barriers provide target tracking: If the remote side
79 * exit()s, an abortion event is implicitly queued on the other side. This way,
80 * a sync/wait call will be woken up if the remote side crashed or exited
81 * unexpectedly. However, note that these abortion events are only queued if the
82 * barrier-queue has been drained. Therefore, it is safe to place a barrier and
83 * exit. The other side can safely wait on the barrier even though the exit
84 * queued an abortion event. Usually, the abortion event would overwrite the
85 * barrier, however, that's not true for exit-abortion events. Those are only
86 * queued if the barrier-queue is drained (thus, the receiving side has placed
87 * more barriers than the remote side).
88 */
89
90/**
7566e267 91 * barrier_create() - Initialize a barrier object
279da1e3
DH
92 * @obj: barrier to initialize
93 *
94 * This initializes a barrier object. The caller is responsible of allocating
95 * the memory and keeping it valid. The memory does not have to be zeroed
96 * beforehand.
97 * Two eventfd objects are allocated for each barrier. If allocation fails, an
98 * error is returned.
99 *
100 * If this function fails, the barrier is reset to an invalid state so it is
101 * safe to call barrier_destroy() on the object regardless whether the
102 * initialization succeeded or not.
103 *
104 * The caller is responsible to destroy the object via barrier_destroy() before
105 * releasing the underlying memory.
106 *
107 * Returns: 0 on success, negative error code on failure.
108 */
7566e267 109int barrier_create(Barrier *b) {
fc808616
DH
110 _cleanup_(barrier_destroyp) Barrier *staging = b;
111 int r;
112
7566e267 113 assert(b);
279da1e3 114
fc808616
DH
115 b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
116 if (b->me < 0)
117 return -errno;
118
119 b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
120 if (b->them < 0)
121 return -errno;
122
123 r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
124 if (r < 0)
279da1e3
DH
125 return -errno;
126
fc808616 127 staging = NULL;
279da1e3
DH
128 return 0;
129}
130
131/**
132 * barrier_destroy() - Destroy a barrier object
133 * @b: barrier to destroy or NULL
134 *
7566e267
ZJS
135 * This destroys a barrier object that has previously been passed to
136 * barrier_create(). The object is released and reset to invalid
137 * state. Therefore, it is safe to call barrier_destroy() multiple
138 * times or even if barrier_create() failed. However, barrier must be
ff9b60f3 139 * always initialized with BARRIER_NULL.
279da1e3
DH
140 *
141 * If @b is NULL, this is a no-op.
142 */
143void barrier_destroy(Barrier *b) {
144 if (!b)
145 return;
146
279da1e3
DH
147 b->me = safe_close(b->me);
148 b->them = safe_close(b->them);
7566e267 149 safe_close_pair(b->pipe);
279da1e3
DH
150 b->barriers = 0;
151}
152
153/**
154 * barrier_set_role() - Set the local role of the barrier
155 * @b: barrier to operate on
156 * @role: role to set on the barrier
157 *
7566e267
ZJS
158 * This sets the roles on a barrier object. This is needed to know
159 * which side of the barrier you're on. Usually, the parent creates
160 * the barrier via barrier_create() and then calls fork() or clone().
161 * Therefore, the FDs are duplicated and the child retains the same
162 * barrier object.
279da1e3 163 *
7566e267
ZJS
164 * Both sides need to call barrier_set_role() after fork() or clone()
165 * are done. If this is not done, barriers will not work correctly.
279da1e3
DH
166 *
167 * Note that barriers could be supported without fork() or clone(). However,
168 * this is currently not needed so it hasn't been implemented.
169 */
170void barrier_set_role(Barrier *b, unsigned int role) {
171 int fd;
172
173 assert(b);
174 assert(role == BARRIER_PARENT || role == BARRIER_CHILD);
175 /* make sure this is only called once */
3f7f1fad 176 assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
279da1e3 177
7566e267 178 if (role == BARRIER_PARENT)
279da1e3 179 b->pipe[1] = safe_close(b->pipe[1]);
7566e267 180 else {
279da1e3
DH
181 b->pipe[0] = safe_close(b->pipe[0]);
182
183 /* swap me/them for children */
184 fd = b->me;
185 b->me = b->them;
186 b->them = fd;
187 }
188}
189
190/* places barrier; returns false if we aborted, otherwise true */
191static bool barrier_write(Barrier *b, uint64_t buf) {
192 ssize_t len;
193
194 /* prevent new sync-points if we already aborted */
195 if (barrier_i_aborted(b))
196 return false;
197
5c687f08 198 assert(b->me >= 0);
279da1e3
DH
199 do {
200 len = write(b->me, &buf, sizeof(buf));
7566e267 201 } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
279da1e3
DH
202
203 if (len != sizeof(buf))
204 goto error;
205
206 /* lock if we aborted */
207 if (buf >= (uint64_t)BARRIER_ABORTION) {
208 if (barrier_they_aborted(b))
209 b->barriers = BARRIER_WE_ABORTED;
210 else
211 b->barriers = BARRIER_I_ABORTED;
7566e267 212 } else if (!barrier_is_aborted(b))
279da1e3 213 b->barriers += buf;
279da1e3
DH
214
215 return !barrier_i_aborted(b);
216
217error:
218 /* If there is an unexpected error, we have to make this fatal. There
219 * is no way we can recover from sync-errors. Therefore, we close the
220 * pipe-ends and treat this as abortion. The other end will notice the
221 * pipe-close and treat it as abortion, too. */
222
7566e267 223 safe_close_pair(b->pipe);
279da1e3
DH
224 b->barriers = BARRIER_WE_ABORTED;
225 return false;
226}
227
228/* waits for barriers; returns false if they aborted, otherwise true */
229static bool barrier_read(Barrier *b, int64_t comp) {
279da1e3
DH
230 if (barrier_they_aborted(b))
231 return false;
232
233 while (b->barriers > comp) {
7566e267
ZJS
234 struct pollfd pfd[2] = {
235 { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
236 .events = POLLHUP },
237 { .fd = b->them,
238 .events = POLLIN }};
239 uint64_t buf;
240 int r;
279da1e3
DH
241
242 r = poll(pfd, 2, -1);
7566e267 243 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
279da1e3
DH
244 continue;
245 else if (r < 0)
246 goto error;
247
248 if (pfd[1].revents) {
7566e267
ZJS
249 ssize_t len;
250
251 /* events on @them signal new data for us */
279da1e3 252 len = read(b->them, &buf, sizeof(buf));
7566e267 253 if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
279da1e3
DH
254 continue;
255
256 if (len != sizeof(buf))
257 goto error;
7566e267 258 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
279da1e3
DH
259 /* POLLHUP on the pipe tells us the other side exited.
260 * We treat this as implicit abortion. But we only
261 * handle it if there's no event on the eventfd. This
262 * guarantees that exit-abortions do not overwrite real
263 * barriers. */
264 buf = BARRIER_ABORTION;
53290ee3
DH
265 else
266 continue;
279da1e3
DH
267
268 /* lock if they aborted */
269 if (buf >= (uint64_t)BARRIER_ABORTION) {
270 if (barrier_i_aborted(b))
271 b->barriers = BARRIER_WE_ABORTED;
272 else
273 b->barriers = BARRIER_THEY_ABORTED;
7566e267 274 } else if (!barrier_is_aborted(b))
279da1e3 275 b->barriers -= buf;
279da1e3
DH
276 }
277
278 return !barrier_they_aborted(b);
279
280error:
281 /* If there is an unexpected error, we have to make this fatal. There
282 * is no way we can recover from sync-errors. Therefore, we close the
283 * pipe-ends and treat this as abortion. The other end will notice the
284 * pipe-close and treat it as abortion, too. */
285
7566e267 286 safe_close_pair(b->pipe);
279da1e3
DH
287 b->barriers = BARRIER_WE_ABORTED;
288 return false;
289}
290
291/**
292 * barrier_place() - Place a new barrier
293 * @b: barrier object
294 *
295 * This places a new barrier on the barrier object. If either side already
296 * aborted, this is a no-op and returns "false". Otherwise, the barrier is
297 * placed and this returns "true".
298 *
299 * Returns: true if barrier was placed, false if either side aborted.
300 */
301bool barrier_place(Barrier *b) {
302 assert(b);
303
304 if (barrier_is_aborted(b))
305 return false;
306
307 barrier_write(b, BARRIER_SINGLE);
308 return true;
309}
310
311/**
312 * barrier_abort() - Abort the synchronization
313 * @b: barrier object to abort
314 *
315 * This aborts the barrier-synchronization. If barrier_abort() was already
316 * called on this side, this is a no-op. Otherwise, the barrier is put into the
317 * ABORT-state and will stay there. The other side is notified about the
318 * abortion. Any following attempt to place normal barriers or to wait on normal
319 * barriers will return immediately as "false".
320 *
321 * You can wait for the other side to call barrier_abort(), too. Use
322 * barrier_wait_abortion() for that.
323 *
324 * Returns: false if the other side already aborted, true otherwise.
325 */
326bool barrier_abort(Barrier *b) {
327 assert(b);
328
329 barrier_write(b, BARRIER_ABORTION);
330 return !barrier_they_aborted(b);
331}
332
333/**
334 * barrier_wait_next() - Wait for the next barrier of the other side
335 * @b: barrier to operate on
336 *
337 * This waits until the other side places its next barrier. This is independent
338 * of any barrier-links and just waits for any next barrier of the other side.
339 *
340 * If either side aborted, this returns false.
341 *
342 * Returns: false if either side aborted, true otherwise.
343 */
344bool barrier_wait_next(Barrier *b) {
345 assert(b);
346
347 if (barrier_is_aborted(b))
348 return false;
349
350 barrier_read(b, b->barriers - 1);
351 return !barrier_is_aborted(b);
352}
353
354/**
355 * barrier_wait_abortion() - Wait for the other side to abort
356 * @b: barrier to operate on
357 *
358 * This waits until the other side called barrier_abort(). This can be called
359 * regardless whether the local side already called barrier_abort() or not.
360 *
361 * If the other side has already aborted, this returns immediately.
362 *
363 * Returns: false if the local side aborted, true otherwise.
364 */
365bool barrier_wait_abortion(Barrier *b) {
366 assert(b);
367
368 barrier_read(b, BARRIER_THEY_ABORTED);
369 return !barrier_i_aborted(b);
370}
371
372/**
373 * barrier_sync_next() - Wait for the other side to place a next linked barrier
374 * @b: barrier to operate on
375 *
376 * This is like barrier_wait_next() and waits for the other side to call
377 * barrier_place(). However, this only waits for linked barriers. That means, if
378 * the other side already placed more barriers than (or as much as) we did, this
379 * returns immediately instead of waiting.
380 *
381 * If either side aborted, this returns false.
382 *
383 * Returns: false if either side aborted, true otherwise.
384 */
385bool barrier_sync_next(Barrier *b) {
386 assert(b);
387
388 if (barrier_is_aborted(b))
389 return false;
390
391 barrier_read(b, MAX((int64_t)0, b->barriers - 1));
392 return !barrier_is_aborted(b);
393}
394
395/**
396 * barrier_sync() - Wait for the other side to place as many barriers as we did
397 * @b: barrier to operate on
398 *
399 * This is like barrier_sync_next() but waits for the other side to call
400 * barrier_place() as often as we did (in total). If they already placed as much
401 * as we did (or more), this returns immediately instead of waiting.
402 *
403 * If either side aborted, this returns false.
404 *
405 * Returns: false if either side aborted, true otherwise.
406 */
407bool barrier_sync(Barrier *b) {
408 assert(b);
409
410 if (barrier_is_aborted(b))
411 return false;
412
413 barrier_read(b, 0);
414 return !barrier_is_aborted(b);
415}