src/shared/barrier.c

   1 /* SPDX-License-Identifier: LGPL-2.1-or-later */
   2
   3 #include <errno.h>
   4 #include <fcntl.h>
   5 #include <poll.h>
   6 #include <stdbool.h>
   7 #include <stdint.h>
   8 #include <stdlib.h>
   9 #include <sys/eventfd.h>
  10 #include <sys/types.h>
  11 #include <unistd.h>
  12
  13 #include "barrier.h"
  14 #include "fd-util.h"
  15 #include "macro.h"
  16
  17 /**
  18  * Barriers
  19  * This barrier implementation provides a simple synchronization method based
  20  * on file-descriptors that can safely be used between threads and processes. A
  21  * barrier object contains 2 shared counters based on eventfd. Both processes
  22  * can now place barriers and wait for the other end to reach a random or
  23  * specific barrier.
  24  * Barriers are numbered, so you can either wait for the other end to reach any
  25  * barrier or the last barrier that you placed. This way, you can use barriers
  26  * for one-way *and* full synchronization. Note that even-though barriers are
  27  * numbered, these numbers are internal and recycled once both sides reached the
  28  * same barrier (implemented as a simple signed counter). It is thus not
  29  * possible to address barriers by their ID.
  30  *
  31  * Barrier-API: Both ends can place as many barriers via barrier_place() as
  32  * they want and each pair of barriers on both sides will be implicitly linked.
  33  * Each side can use the barrier_wait/sync_*() family of calls to wait for the
  34  * other side to place a specific barrier. barrier_wait_next() waits until the
  35  * other side calls barrier_place(). No links between the barriers are
  36  * considered and this simply serves as most basic asynchronous barrier.
  37  * barrier_sync_next() is like barrier_wait_next() and waits for the other side
  38  * to place their next barrier via barrier_place(). However, it only waits for
  39  * barriers that are linked to a barrier we already placed. If the other side
  40  * already placed more barriers than we did, barrier_sync_next() returns
  41  * immediately.
  42  * barrier_sync() extends barrier_sync_next() and waits until the other end
  43  * placed as many barriers via barrier_place() as we did. If they already placed
  44  * as many as we did (or more), it returns immediately.
  45  *
  46  * Additionally to basic barriers, an abortion event is available.
  47  * barrier_abort() places an abortion event that cannot be undone. An abortion
  48  * immediately cancels all placed barriers and replaces them. Any running and
  49  * following wait/sync call besides barrier_wait_abortion() will immediately
  50  * return false on both sides (otherwise, they always return true).
  51  * barrier_abort() can be called multiple times on both ends and will be a
  52  * no-op if already called on this side.
  53  * barrier_wait_abortion() can be used to wait for the other side to call
  54  * barrier_abort() and is the only wait/sync call that does not return
  55  * immediately if we aborted outself. It only returns once the other side
  56  * called barrier_abort().
  57  *
  58  * Barriers can be used for in-process and inter-process synchronization.
  59  * However, for in-process synchronization you could just use mutexes.
  60  * Therefore, main target is IPC and we require both sides to *not* share the FD
  61  * table. If that's given, barriers provide target tracking: If the remote side
  62  * exit()s, an abortion event is implicitly queued on the other side. This way,
  63  * a sync/wait call will be woken up if the remote side crashed or exited
  64  * unexpectedly. However, note that these abortion events are only queued if the
  65  * barrier-queue has been drained. Therefore, it is safe to place a barrier and
  66  * exit. The other side can safely wait on the barrier even though the exit
  67  * queued an abortion event. Usually, the abortion event would overwrite the
  68  * barrier, however, that's not true for exit-abortion events. Those are only
  69  * queued if the barrier-queue is drained (thus, the receiving side has placed
  70  * more barriers than the remote side).
  71  */
  72
  73 /**
  74  * barrier_create() - Initialize a barrier object
  75  * @obj: barrier to initialize
  76  *
  77  * This initializes a barrier object. The caller is responsible of allocating
  78  * the memory and keeping it valid. The memory does not have to be zeroed
  79  * beforehand.
  80  * Two eventfd objects are allocated for each barrier. If allocation fails, an
  81  * error is returned.
  82  *
  83  * If this function fails, the barrier is reset to an invalid state so it is
  84  * safe to call barrier_destroy() on the object regardless whether the
  85  * initialization succeeded or not.
  86  *
  87  * The caller is responsible to destroy the object via barrier_destroy() before
  88  * releasing the underlying memory.
  89  *
  90  * Returns: 0 on success, negative error code on failure.
  91  */
  92 int barrier_create(Barrier *b) {
  93         _cleanup_(barrier_destroyp) Barrier *staging = b;
  94         int r;
  95
  96         assert(b);
  97
  98         b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
  99         if (b->me < 0)
 100                 return -errno;
 101
 102         b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 103         if (b->them < 0)
 104                 return -errno;
 105
 106         r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
 107         if (r < 0)
 108                 return -errno;
 109
 110         staging = NULL;
 111         return 0;
 112 }
 113
 114 /**
 115  * barrier_destroy() - Destroy a barrier object
 116  * @b: barrier to destroy or NULL
 117  *
 118  * This destroys a barrier object that has previously been passed to
 119  * barrier_create(). The object is released and reset to invalid
 120  * state. Therefore, it is safe to call barrier_destroy() multiple
 121  * times or even if barrier_create() failed. However, barrier must be
 122  * always initialized with BARRIER_NULL.
 123  *
 124  * If @b is NULL, this is a no-op.
 125  */
 126 void barrier_destroy(Barrier *b) {
 127         if (!b)
 128                 return;
 129
 130         b->me = safe_close(b->me);
 131         b->them = safe_close(b->them);
 132         safe_close_pair(b->pipe);
 133         b->barriers = 0;
 134 }
 135
 136 /**
 137  * barrier_set_role() - Set the local role of the barrier
 138  * @b: barrier to operate on
 139  * @role: role to set on the barrier
 140  *
 141  * This sets the roles on a barrier object. This is needed to know
 142  * which side of the barrier you're on. Usually, the parent creates
 143  * the barrier via barrier_create() and then calls fork() or clone().
 144  * Therefore, the FDs are duplicated and the child retains the same
 145  * barrier object.
 146  *
 147  * Both sides need to call barrier_set_role() after fork() or clone()
 148  * are done. If this is not done, barriers will not work correctly.
 149  *
 150  * Note that barriers could be supported without fork() or clone(). However,
 151  * this is currently not needed so it hasn't been implemented.
 152  */
 153 void barrier_set_role(Barrier *b, unsigned role) {
 154         assert(b);
 155         assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
 156         /* make sure this is only called once */
 157         assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
 158
 159         if (role == BARRIER_PARENT)
 160                 b->pipe[1] = safe_close(b->pipe[1]);
 161         else {
 162                 b->pipe[0] = safe_close(b->pipe[0]);
 163
 164                 /* swap me/them for children */
 165                 SWAP_TWO(b->me, b->them);
 166         }
 167 }
 168
 169 /* places barrier; returns false if we aborted, otherwise true */
 170 static bool barrier_write(Barrier *b, uint64_t buf) {
 171         ssize_t len;
 172
 173         /* prevent new sync-points if we already aborted */
 174         if (barrier_i_aborted(b))
 175                 return false;
 176
 177         assert(b->me >= 0);
 178         do {
 179                 len = write(b->me, &buf, sizeof(buf));
 180         } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
 181
 182         if (len != sizeof(buf))
 183                 goto error;
 184
 185         /* lock if we aborted */
 186         if (buf >= (uint64_t)BARRIER_ABORTION) {
 187                 if (barrier_they_aborted(b))
 188                         b->barriers = BARRIER_WE_ABORTED;
 189                 else
 190                         b->barriers = BARRIER_I_ABORTED;
 191         } else if (!barrier_is_aborted(b))
 192                 b->barriers += buf;
 193
 194         return !barrier_i_aborted(b);
 195
 196 error:
 197         /* If there is an unexpected error, we have to make this fatal. There
 198          * is no way we can recover from sync-errors. Therefore, we close the
 199          * pipe-ends and treat this as abortion. The other end will notice the
 200          * pipe-close and treat it as abortion, too. */
 201
 202         safe_close_pair(b->pipe);
 203         b->barriers = BARRIER_WE_ABORTED;
 204         return false;
 205 }
 206
 207 /* waits for barriers; returns false if they aborted, otherwise true */
 208 static bool barrier_read(Barrier *b, int64_t comp) {
 209         if (barrier_they_aborted(b))
 210                 return false;
 211
 212         while (b->barriers > comp) {
 213                 struct pollfd pfd[2] = {
 214                         { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
 215                           .events = POLLHUP },
 216                         { .fd = b->them,
 217                           .events = POLLIN }};
 218                 uint64_t buf;
 219                 int r;
 220
 221                 r = poll(pfd, ELEMENTSOF(pfd), -1);
 222                 if (r < 0) {
 223                         if (IN_SET(errno, EAGAIN, EINTR))
 224                                 continue;
 225                         goto error;
 226                 }
 227                 if (pfd[0].revents & POLLNVAL ||
 228                     pfd[1].revents & POLLNVAL)
 229                         goto error;
 230
 231                 if (pfd[1].revents) {
 232                         ssize_t len;
 233
 234                         /* events on @them signal new data for us */
 235                         len = read(b->them, &buf, sizeof(buf));
 236                         if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
 237                                 continue;
 238
 239                         if (len != sizeof(buf))
 240                                 goto error;
 241                 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
 242                         /* POLLHUP on the pipe tells us the other side exited.
 243                          * We treat this as implicit abortion. But we only
 244                          * handle it if there's no event on the eventfd. This
 245                          * guarantees that exit-abortions do not overwrite real
 246                          * barriers. */
 247                         buf = BARRIER_ABORTION;
 248                 else
 249                         continue;
 250
 251                 /* lock if they aborted */
 252                 if (buf >= (uint64_t)BARRIER_ABORTION) {
 253                         if (barrier_i_aborted(b))
 254                                 b->barriers = BARRIER_WE_ABORTED;
 255                         else
 256                                 b->barriers = BARRIER_THEY_ABORTED;
 257                 } else if (!barrier_is_aborted(b))
 258                         b->barriers -= buf;
 259         }
 260
 261         return !barrier_they_aborted(b);
 262
 263 error:
 264         /* If there is an unexpected error, we have to make this fatal. There
 265          * is no way we can recover from sync-errors. Therefore, we close the
 266          * pipe-ends and treat this as abortion. The other end will notice the
 267          * pipe-close and treat it as abortion, too. */
 268
 269         safe_close_pair(b->pipe);
 270         b->barriers = BARRIER_WE_ABORTED;
 271         return false;
 272 }
 273
 274 /**
 275  * barrier_place() - Place a new barrier
 276  * @b: barrier object
 277  *
 278  * This places a new barrier on the barrier object. If either side already
 279  * aborted, this is a no-op and returns "false". Otherwise, the barrier is
 280  * placed and this returns "true".
 281  *
 282  * Returns: true if barrier was placed, false if either side aborted.
 283  */
 284 bool barrier_place(Barrier *b) {
 285         assert(b);
 286
 287         if (barrier_is_aborted(b))
 288                 return false;
 289
 290         barrier_write(b, BARRIER_SINGLE);
 291         return true;
 292 }
 293
 294 /**
 295  * barrier_abort() - Abort the synchronization
 296  * @b: barrier object to abort
 297  *
 298  * This aborts the barrier-synchronization. If barrier_abort() was already
 299  * called on this side, this is a no-op. Otherwise, the barrier is put into the
 300  * ABORT-state and will stay there. The other side is notified about the
 301  * abortion. Any following attempt to place normal barriers or to wait on normal
 302  * barriers will return immediately as "false".
 303  *
 304  * You can wait for the other side to call barrier_abort(), too. Use
 305  * barrier_wait_abortion() for that.
 306  *
 307  * Returns: false if the other side already aborted, true otherwise.
 308  */
 309 bool barrier_abort(Barrier *b) {
 310         assert(b);
 311
 312         barrier_write(b, BARRIER_ABORTION);
 313         return !barrier_they_aborted(b);
 314 }
 315
 316 /**
 317  * barrier_wait_next() - Wait for the next barrier of the other side
 318  * @b: barrier to operate on
 319  *
 320  * This waits until the other side places its next barrier. This is independent
 321  * of any barrier-links and just waits for any next barrier of the other side.
 322  *
 323  * If either side aborted, this returns false.
 324  *
 325  * Returns: false if either side aborted, true otherwise.
 326  */
 327 bool barrier_wait_next(Barrier *b) {
 328         assert(b);
 329
 330         if (barrier_is_aborted(b))
 331                 return false;
 332
 333         barrier_read(b, b->barriers - 1);
 334         return !barrier_is_aborted(b);
 335 }
 336
 337 /**
 338  * barrier_wait_abortion() - Wait for the other side to abort
 339  * @b: barrier to operate on
 340  *
 341  * This waits until the other side called barrier_abort(). This can be called
 342  * regardless whether the local side already called barrier_abort() or not.
 343  *
 344  * If the other side has already aborted, this returns immediately.
 345  *
 346  * Returns: false if the local side aborted, true otherwise.
 347  */
 348 bool barrier_wait_abortion(Barrier *b) {
 349         assert(b);
 350
 351         barrier_read(b, BARRIER_THEY_ABORTED);
 352         return !barrier_i_aborted(b);
 353 }
 354
 355 /**
 356  * barrier_sync_next() - Wait for the other side to place a next linked barrier
 357  * @b: barrier to operate on
 358  *
 359  * This is like barrier_wait_next() and waits for the other side to call
 360  * barrier_place(). However, this only waits for linked barriers. That means, if
 361  * the other side already placed more barriers than (or as much as) we did, this
 362  * returns immediately instead of waiting.
 363  *
 364  * If either side aborted, this returns false.
 365  *
 366  * Returns: false if either side aborted, true otherwise.
 367  */
 368 bool barrier_sync_next(Barrier *b) {
 369         assert(b);
 370
 371         if (barrier_is_aborted(b))
 372                 return false;
 373
 374         barrier_read(b, MAX((int64_t)0, b->barriers - 1));
 375         return !barrier_is_aborted(b);
 376 }
 377
 378 /**
 379  * barrier_sync() - Wait for the other side to place as many barriers as we did
 380  * @b: barrier to operate on
 381  *
 382  * This is like barrier_sync_next() but waits for the other side to call
 383  * barrier_place() as often as we did (in total). If they already placed as much
 384  * as we did (or more), this returns immediately instead of waiting.
 385  *
 386  * If either side aborted, this returns false.
 387  *
 388  * Returns: false if either side aborted, true otherwise.
 389  */
 390 bool barrier_sync(Barrier *b) {
 391         assert(b);
 392
 393         if (barrier_is_aborted(b))
 394                 return false;
 395
 396         barrier_read(b, 0);
 397         return !barrier_is_aborted(b);
 398 }