]>
Commit | Line | Data |
---|---|---|
53e1b683 | 1 | /* SPDX-License-Identifier: LGPL-2.1+ */ |
279da1e3 DH |
2 | /*** |
3 | This file is part of systemd. | |
4 | ||
5 | Copyright 2014 David Herrmann <dh.herrmann@gmail.com> | |
6 | ||
7 | systemd is free software; you can redistribute it and/or modify it | |
8 | under the terms of the GNU Lesser General Public License as published by | |
9 | the Free Software Foundation; either version 2.1 of the License, or | |
10 | (at your option) any later version. | |
11 | ||
12 | systemd is distributed in the hope that it will be useful, but | |
13 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 | Lesser General Public License for more details. | |
16 | ||
17 | You should have received a copy of the GNU Lesser General Public License | |
18 | along with systemd; If not, see <http://www.gnu.org/licenses/>. | |
19 | ***/ | |
20 | ||
21 | #include <errno.h> | |
22 | #include <fcntl.h> | |
279da1e3 DH |
23 | #include <poll.h> |
24 | #include <stdbool.h> | |
25 | #include <stdint.h> | |
279da1e3 | 26 | #include <stdlib.h> |
279da1e3 DH |
27 | #include <sys/eventfd.h> |
28 | #include <sys/types.h> | |
29 | #include <unistd.h> | |
30 | ||
31 | #include "barrier.h" | |
3ffd4af2 | 32 | #include "fd-util.h" |
279da1e3 | 33 | #include "macro.h" |
279da1e3 DH |
34 | |
35 | /** | |
36 | * Barriers | |
37 | * This barrier implementation provides a simple synchronization method based | |
38 | * on file-descriptors that can safely be used between threads and processes. A | |
39 | * barrier object contains 2 shared counters based on eventfd. Both processes | |
40 | * can now place barriers and wait for the other end to reach a random or | |
41 | * specific barrier. | |
42 | * Barriers are numbered, so you can either wait for the other end to reach any | |
43 | * barrier or the last barrier that you placed. This way, you can use barriers | |
44 | * for one-way *and* full synchronization. Note that even-though barriers are | |
45 | * numbered, these numbers are internal and recycled once both sides reached the | |
46 | * same barrier (implemented as a simple signed counter). It is thus not | |
47 | * possible to address barriers by their ID. | |
48 | * | |
49 | * Barrier-API: Both ends can place as many barriers via barrier_place() as | |
50 | * they want and each pair of barriers on both sides will be implicitly linked. | |
51 | * Each side can use the barrier_wait/sync_*() family of calls to wait for the | |
52 | * other side to place a specific barrier. barrier_wait_next() waits until the | |
53 | * other side calls barrier_place(). No links between the barriers are | |
54 | * considered and this simply serves as most basic asynchronous barrier. | |
55 | * barrier_sync_next() is like barrier_wait_next() and waits for the other side | |
56 | * to place their next barrier via barrier_place(). However, it only waits for | |
57 | * barriers that are linked to a barrier we already placed. If the other side | |
58 | * already placed more barriers than we did, barrier_sync_next() returns | |
59 | * immediately. | |
60 | * barrier_sync() extends barrier_sync_next() and waits until the other end | |
61 | * placed as many barriers via barrier_place() as we did. If they already placed | |
62 | * as many as we did (or more), it returns immediately. | |
63 | * | |
64 | * Additionally to basic barriers, an abortion event is available. | |
65 | * barrier_abort() places an abortion event that cannot be undone. An abortion | |
66 | * immediately cancels all placed barriers and replaces them. Any running and | |
67 | * following wait/sync call besides barrier_wait_abortion() will immediately | |
68 | * return false on both sides (otherwise, they always return true). | |
69 | * barrier_abort() can be called multiple times on both ends and will be a | |
70 | * no-op if already called on this side. | |
71 | * barrier_wait_abortion() can be used to wait for the other side to call | |
72 | * barrier_abort() and is the only wait/sync call that does not return | |
73 | * immediately if we aborted outself. It only returns once the other side | |
74 | * called barrier_abort(). | |
75 | * | |
76 | * Barriers can be used for in-process and inter-process synchronization. | |
77 | * However, for in-process synchronization you could just use mutexes. | |
78 | * Therefore, main target is IPC and we require both sides to *not* share the FD | |
79 | * table. If that's given, barriers provide target tracking: If the remote side | |
80 | * exit()s, an abortion event is implicitly queued on the other side. This way, | |
81 | * a sync/wait call will be woken up if the remote side crashed or exited | |
82 | * unexpectedly. However, note that these abortion events are only queued if the | |
83 | * barrier-queue has been drained. Therefore, it is safe to place a barrier and | |
84 | * exit. The other side can safely wait on the barrier even though the exit | |
85 | * queued an abortion event. Usually, the abortion event would overwrite the | |
86 | * barrier, however, that's not true for exit-abortion events. Those are only | |
87 | * queued if the barrier-queue is drained (thus, the receiving side has placed | |
88 | * more barriers than the remote side). | |
89 | */ | |
90 | ||
91 | /** | |
7566e267 | 92 | * barrier_create() - Initialize a barrier object |
279da1e3 DH |
93 | * @obj: barrier to initialize |
94 | * | |
95 | * This initializes a barrier object. The caller is responsible of allocating | |
96 | * the memory and keeping it valid. The memory does not have to be zeroed | |
97 | * beforehand. | |
98 | * Two eventfd objects are allocated for each barrier. If allocation fails, an | |
99 | * error is returned. | |
100 | * | |
101 | * If this function fails, the barrier is reset to an invalid state so it is | |
102 | * safe to call barrier_destroy() on the object regardless whether the | |
103 | * initialization succeeded or not. | |
104 | * | |
105 | * The caller is responsible to destroy the object via barrier_destroy() before | |
106 | * releasing the underlying memory. | |
107 | * | |
108 | * Returns: 0 on success, negative error code on failure. | |
109 | */ | |
7566e267 | 110 | int barrier_create(Barrier *b) { |
fc808616 DH |
111 | _cleanup_(barrier_destroyp) Barrier *staging = b; |
112 | int r; | |
113 | ||
7566e267 | 114 | assert(b); |
279da1e3 | 115 | |
fc808616 DH |
116 | b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); |
117 | if (b->me < 0) | |
118 | return -errno; | |
119 | ||
120 | b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK); | |
121 | if (b->them < 0) | |
122 | return -errno; | |
123 | ||
124 | r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK); | |
125 | if (r < 0) | |
279da1e3 DH |
126 | return -errno; |
127 | ||
fc808616 | 128 | staging = NULL; |
279da1e3 DH |
129 | return 0; |
130 | } | |
131 | ||
132 | /** | |
133 | * barrier_destroy() - Destroy a barrier object | |
134 | * @b: barrier to destroy or NULL | |
135 | * | |
7566e267 ZJS |
136 | * This destroys a barrier object that has previously been passed to |
137 | * barrier_create(). The object is released and reset to invalid | |
138 | * state. Therefore, it is safe to call barrier_destroy() multiple | |
139 | * times or even if barrier_create() failed. However, barrier must be | |
ff9b60f3 | 140 | * always initialized with BARRIER_NULL. |
279da1e3 DH |
141 | * |
142 | * If @b is NULL, this is a no-op. | |
143 | */ | |
144 | void barrier_destroy(Barrier *b) { | |
145 | if (!b) | |
146 | return; | |
147 | ||
279da1e3 DH |
148 | b->me = safe_close(b->me); |
149 | b->them = safe_close(b->them); | |
7566e267 | 150 | safe_close_pair(b->pipe); |
279da1e3 DH |
151 | b->barriers = 0; |
152 | } | |
153 | ||
154 | /** | |
155 | * barrier_set_role() - Set the local role of the barrier | |
156 | * @b: barrier to operate on | |
157 | * @role: role to set on the barrier | |
158 | * | |
7566e267 ZJS |
159 | * This sets the roles on a barrier object. This is needed to know |
160 | * which side of the barrier you're on. Usually, the parent creates | |
161 | * the barrier via barrier_create() and then calls fork() or clone(). | |
162 | * Therefore, the FDs are duplicated and the child retains the same | |
163 | * barrier object. | |
279da1e3 | 164 | * |
7566e267 ZJS |
165 | * Both sides need to call barrier_set_role() after fork() or clone() |
166 | * are done. If this is not done, barriers will not work correctly. | |
279da1e3 DH |
167 | * |
168 | * Note that barriers could be supported without fork() or clone(). However, | |
169 | * this is currently not needed so it hasn't been implemented. | |
170 | */ | |
171 | void barrier_set_role(Barrier *b, unsigned int role) { | |
172 | int fd; | |
173 | ||
174 | assert(b); | |
3742095b | 175 | assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD)); |
279da1e3 | 176 | /* make sure this is only called once */ |
3f7f1fad | 177 | assert(b->pipe[0] >= 0 && b->pipe[1] >= 0); |
279da1e3 | 178 | |
7566e267 | 179 | if (role == BARRIER_PARENT) |
279da1e3 | 180 | b->pipe[1] = safe_close(b->pipe[1]); |
7566e267 | 181 | else { |
279da1e3 DH |
182 | b->pipe[0] = safe_close(b->pipe[0]); |
183 | ||
184 | /* swap me/them for children */ | |
185 | fd = b->me; | |
186 | b->me = b->them; | |
187 | b->them = fd; | |
188 | } | |
189 | } | |
190 | ||
191 | /* places barrier; returns false if we aborted, otherwise true */ | |
192 | static bool barrier_write(Barrier *b, uint64_t buf) { | |
193 | ssize_t len; | |
194 | ||
195 | /* prevent new sync-points if we already aborted */ | |
196 | if (barrier_i_aborted(b)) | |
197 | return false; | |
198 | ||
5c687f08 | 199 | assert(b->me >= 0); |
279da1e3 DH |
200 | do { |
201 | len = write(b->me, &buf, sizeof(buf)); | |
7566e267 | 202 | } while (len < 0 && IN_SET(errno, EAGAIN, EINTR)); |
279da1e3 DH |
203 | |
204 | if (len != sizeof(buf)) | |
205 | goto error; | |
206 | ||
207 | /* lock if we aborted */ | |
208 | if (buf >= (uint64_t)BARRIER_ABORTION) { | |
209 | if (barrier_they_aborted(b)) | |
210 | b->barriers = BARRIER_WE_ABORTED; | |
211 | else | |
212 | b->barriers = BARRIER_I_ABORTED; | |
7566e267 | 213 | } else if (!barrier_is_aborted(b)) |
279da1e3 | 214 | b->barriers += buf; |
279da1e3 DH |
215 | |
216 | return !barrier_i_aborted(b); | |
217 | ||
218 | error: | |
219 | /* If there is an unexpected error, we have to make this fatal. There | |
220 | * is no way we can recover from sync-errors. Therefore, we close the | |
221 | * pipe-ends and treat this as abortion. The other end will notice the | |
222 | * pipe-close and treat it as abortion, too. */ | |
223 | ||
7566e267 | 224 | safe_close_pair(b->pipe); |
279da1e3 DH |
225 | b->barriers = BARRIER_WE_ABORTED; |
226 | return false; | |
227 | } | |
228 | ||
229 | /* waits for barriers; returns false if they aborted, otherwise true */ | |
230 | static bool barrier_read(Barrier *b, int64_t comp) { | |
279da1e3 DH |
231 | if (barrier_they_aborted(b)) |
232 | return false; | |
233 | ||
234 | while (b->barriers > comp) { | |
7566e267 ZJS |
235 | struct pollfd pfd[2] = { |
236 | { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1], | |
237 | .events = POLLHUP }, | |
238 | { .fd = b->them, | |
239 | .events = POLLIN }}; | |
240 | uint64_t buf; | |
241 | int r; | |
279da1e3 DH |
242 | |
243 | r = poll(pfd, 2, -1); | |
7566e267 | 244 | if (r < 0 && IN_SET(errno, EAGAIN, EINTR)) |
279da1e3 DH |
245 | continue; |
246 | else if (r < 0) | |
247 | goto error; | |
248 | ||
249 | if (pfd[1].revents) { | |
7566e267 ZJS |
250 | ssize_t len; |
251 | ||
252 | /* events on @them signal new data for us */ | |
279da1e3 | 253 | len = read(b->them, &buf, sizeof(buf)); |
7566e267 | 254 | if (len < 0 && IN_SET(errno, EAGAIN, EINTR)) |
279da1e3 DH |
255 | continue; |
256 | ||
257 | if (len != sizeof(buf)) | |
258 | goto error; | |
7566e267 | 259 | } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL)) |
279da1e3 DH |
260 | /* POLLHUP on the pipe tells us the other side exited. |
261 | * We treat this as implicit abortion. But we only | |
262 | * handle it if there's no event on the eventfd. This | |
263 | * guarantees that exit-abortions do not overwrite real | |
264 | * barriers. */ | |
265 | buf = BARRIER_ABORTION; | |
53290ee3 DH |
266 | else |
267 | continue; | |
279da1e3 DH |
268 | |
269 | /* lock if they aborted */ | |
270 | if (buf >= (uint64_t)BARRIER_ABORTION) { | |
271 | if (barrier_i_aborted(b)) | |
272 | b->barriers = BARRIER_WE_ABORTED; | |
273 | else | |
274 | b->barriers = BARRIER_THEY_ABORTED; | |
7566e267 | 275 | } else if (!barrier_is_aborted(b)) |
279da1e3 | 276 | b->barriers -= buf; |
279da1e3 DH |
277 | } |
278 | ||
279 | return !barrier_they_aborted(b); | |
280 | ||
281 | error: | |
282 | /* If there is an unexpected error, we have to make this fatal. There | |
283 | * is no way we can recover from sync-errors. Therefore, we close the | |
284 | * pipe-ends and treat this as abortion. The other end will notice the | |
285 | * pipe-close and treat it as abortion, too. */ | |
286 | ||
7566e267 | 287 | safe_close_pair(b->pipe); |
279da1e3 DH |
288 | b->barriers = BARRIER_WE_ABORTED; |
289 | return false; | |
290 | } | |
291 | ||
292 | /** | |
293 | * barrier_place() - Place a new barrier | |
294 | * @b: barrier object | |
295 | * | |
296 | * This places a new barrier on the barrier object. If either side already | |
297 | * aborted, this is a no-op and returns "false". Otherwise, the barrier is | |
298 | * placed and this returns "true". | |
299 | * | |
300 | * Returns: true if barrier was placed, false if either side aborted. | |
301 | */ | |
302 | bool barrier_place(Barrier *b) { | |
303 | assert(b); | |
304 | ||
305 | if (barrier_is_aborted(b)) | |
306 | return false; | |
307 | ||
308 | barrier_write(b, BARRIER_SINGLE); | |
309 | return true; | |
310 | } | |
311 | ||
312 | /** | |
313 | * barrier_abort() - Abort the synchronization | |
314 | * @b: barrier object to abort | |
315 | * | |
316 | * This aborts the barrier-synchronization. If barrier_abort() was already | |
317 | * called on this side, this is a no-op. Otherwise, the barrier is put into the | |
318 | * ABORT-state and will stay there. The other side is notified about the | |
319 | * abortion. Any following attempt to place normal barriers or to wait on normal | |
320 | * barriers will return immediately as "false". | |
321 | * | |
322 | * You can wait for the other side to call barrier_abort(), too. Use | |
323 | * barrier_wait_abortion() for that. | |
324 | * | |
325 | * Returns: false if the other side already aborted, true otherwise. | |
326 | */ | |
327 | bool barrier_abort(Barrier *b) { | |
328 | assert(b); | |
329 | ||
330 | barrier_write(b, BARRIER_ABORTION); | |
331 | return !barrier_they_aborted(b); | |
332 | } | |
333 | ||
334 | /** | |
335 | * barrier_wait_next() - Wait for the next barrier of the other side | |
336 | * @b: barrier to operate on | |
337 | * | |
338 | * This waits until the other side places its next barrier. This is independent | |
339 | * of any barrier-links and just waits for any next barrier of the other side. | |
340 | * | |
341 | * If either side aborted, this returns false. | |
342 | * | |
343 | * Returns: false if either side aborted, true otherwise. | |
344 | */ | |
345 | bool barrier_wait_next(Barrier *b) { | |
346 | assert(b); | |
347 | ||
348 | if (barrier_is_aborted(b)) | |
349 | return false; | |
350 | ||
351 | barrier_read(b, b->barriers - 1); | |
352 | return !barrier_is_aborted(b); | |
353 | } | |
354 | ||
355 | /** | |
356 | * barrier_wait_abortion() - Wait for the other side to abort | |
357 | * @b: barrier to operate on | |
358 | * | |
359 | * This waits until the other side called barrier_abort(). This can be called | |
360 | * regardless whether the local side already called barrier_abort() or not. | |
361 | * | |
362 | * If the other side has already aborted, this returns immediately. | |
363 | * | |
364 | * Returns: false if the local side aborted, true otherwise. | |
365 | */ | |
366 | bool barrier_wait_abortion(Barrier *b) { | |
367 | assert(b); | |
368 | ||
369 | barrier_read(b, BARRIER_THEY_ABORTED); | |
370 | return !barrier_i_aborted(b); | |
371 | } | |
372 | ||
373 | /** | |
374 | * barrier_sync_next() - Wait for the other side to place a next linked barrier | |
375 | * @b: barrier to operate on | |
376 | * | |
377 | * This is like barrier_wait_next() and waits for the other side to call | |
378 | * barrier_place(). However, this only waits for linked barriers. That means, if | |
379 | * the other side already placed more barriers than (or as much as) we did, this | |
380 | * returns immediately instead of waiting. | |
381 | * | |
382 | * If either side aborted, this returns false. | |
383 | * | |
384 | * Returns: false if either side aborted, true otherwise. | |
385 | */ | |
386 | bool barrier_sync_next(Barrier *b) { | |
387 | assert(b); | |
388 | ||
389 | if (barrier_is_aborted(b)) | |
390 | return false; | |
391 | ||
392 | barrier_read(b, MAX((int64_t)0, b->barriers - 1)); | |
393 | return !barrier_is_aborted(b); | |
394 | } | |
395 | ||
396 | /** | |
397 | * barrier_sync() - Wait for the other side to place as many barriers as we did | |
398 | * @b: barrier to operate on | |
399 | * | |
400 | * This is like barrier_sync_next() but waits for the other side to call | |
401 | * barrier_place() as often as we did (in total). If they already placed as much | |
402 | * as we did (or more), this returns immediately instead of waiting. | |
403 | * | |
404 | * If either side aborted, this returns false. | |
405 | * | |
406 | * Returns: false if either side aborted, true otherwise. | |
407 | */ | |
408 | bool barrier_sync(Barrier *b) { | |
409 | assert(b); | |
410 | ||
411 | if (barrier_is_aborted(b)) | |
412 | return false; | |
413 | ||
414 | barrier_read(b, 0); | |
415 | return !barrier_is_aborted(b); | |
416 | } |