src/basic/barrier.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
   4 ***/
   5
   6 #include <errno.h>
   7 #include <fcntl.h>
   8 #include <poll.h>
   9 #include <stdbool.h>
  10 #include <stdint.h>
  11 #include <stdlib.h>
  12 #include <sys/eventfd.h>
  13 #include <sys/types.h>
  14 #include <unistd.h>
  15
  16 #include "barrier.h"
  17 #include "fd-util.h"
  18 #include "macro.h"
  19
  20 /**
  21  * Barriers
  22  * This barrier implementation provides a simple synchronization method based
  23  * on file-descriptors that can safely be used between threads and processes. A
  24  * barrier object contains 2 shared counters based on eventfd. Both processes
  25  * can now place barriers and wait for the other end to reach a random or
  26  * specific barrier.
  27  * Barriers are numbered, so you can either wait for the other end to reach any
  28  * barrier or the last barrier that you placed. This way, you can use barriers
  29  * for one-way *and* full synchronization. Note that even-though barriers are
  30  * numbered, these numbers are internal and recycled once both sides reached the
  31  * same barrier (implemented as a simple signed counter). It is thus not
  32  * possible to address barriers by their ID.
  33  *
  34  * Barrier-API: Both ends can place as many barriers via barrier_place() as
  35  * they want and each pair of barriers on both sides will be implicitly linked.
  36  * Each side can use the barrier_wait/sync_*() family of calls to wait for the
  37  * other side to place a specific barrier. barrier_wait_next() waits until the
  38  * other side calls barrier_place(). No links between the barriers are
  39  * considered and this simply serves as most basic asynchronous barrier.
  40  * barrier_sync_next() is like barrier_wait_next() and waits for the other side
  41  * to place their next barrier via barrier_place(). However, it only waits for
  42  * barriers that are linked to a barrier we already placed. If the other side
  43  * already placed more barriers than we did, barrier_sync_next() returns
  44  * immediately.
  45  * barrier_sync() extends barrier_sync_next() and waits until the other end
  46  * placed as many barriers via barrier_place() as we did. If they already placed
  47  * as many as we did (or more), it returns immediately.
  48  *
  49  * Additionally to basic barriers, an abortion event is available.
  50  * barrier_abort() places an abortion event that cannot be undone. An abortion
  51  * immediately cancels all placed barriers and replaces them. Any running and
  52  * following wait/sync call besides barrier_wait_abortion() will immediately
  53  * return false on both sides (otherwise, they always return true).
  54  * barrier_abort() can be called multiple times on both ends and will be a
  55  * no-op if already called on this side.
  56  * barrier_wait_abortion() can be used to wait for the other side to call
  57  * barrier_abort() and is the only wait/sync call that does not return
  58  * immediately if we aborted outself. It only returns once the other side
  59  * called barrier_abort().
  60  *
  61  * Barriers can be used for in-process and inter-process synchronization.
  62  * However, for in-process synchronization you could just use mutexes.
  63  * Therefore, main target is IPC and we require both sides to *not* share the FD
  64  * table. If that's given, barriers provide target tracking: If the remote side
  65  * exit()s, an abortion event is implicitly queued on the other side. This way,
  66  * a sync/wait call will be woken up if the remote side crashed or exited
  67  * unexpectedly. However, note that these abortion events are only queued if the
  68  * barrier-queue has been drained. Therefore, it is safe to place a barrier and
  69  * exit. The other side can safely wait on the barrier even though the exit
  70  * queued an abortion event. Usually, the abortion event would overwrite the
  71  * barrier, however, that's not true for exit-abortion events. Those are only
  72  * queued if the barrier-queue is drained (thus, the receiving side has placed
  73  * more barriers than the remote side).
  74  */
  75
  76 /**
  77  * barrier_create() - Initialize a barrier object
  78  * @obj: barrier to initialize
  79  *
  80  * This initializes a barrier object. The caller is responsible of allocating
  81  * the memory and keeping it valid. The memory does not have to be zeroed
  82  * beforehand.
  83  * Two eventfd objects are allocated for each barrier. If allocation fails, an
  84  * error is returned.
  85  *
  86  * If this function fails, the barrier is reset to an invalid state so it is
  87  * safe to call barrier_destroy() on the object regardless whether the
  88  * initialization succeeded or not.
  89  *
  90  * The caller is responsible to destroy the object via barrier_destroy() before
  91  * releasing the underlying memory.
  92  *
  93  * Returns: 0 on success, negative error code on failure.
  94  */
  95 int barrier_create(Barrier *b) {
  96         _cleanup_(barrier_destroyp) Barrier *staging = b;
  97         int r;
  98
  99         assert(b);
 100
 101         b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 102         if (b->me < 0)
 103                 return -errno;
 104
 105         b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 106         if (b->them < 0)
 107                 return -errno;
 108
 109         r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
 110         if (r < 0)
 111                 return -errno;
 112
 113         staging = NULL;
 114         return 0;
 115 }
 116
 117 /**
 118  * barrier_destroy() - Destroy a barrier object
 119  * @b: barrier to destroy or NULL
 120  *
 121  * This destroys a barrier object that has previously been passed to
 122  * barrier_create(). The object is released and reset to invalid
 123  * state. Therefore, it is safe to call barrier_destroy() multiple
 124  * times or even if barrier_create() failed. However, barrier must be
 125  * always initialized with BARRIER_NULL.
 126  *
 127  * If @b is NULL, this is a no-op.
 128  */
 129 void barrier_destroy(Barrier *b) {
 130         if (!b)
 131                 return;
 132
 133         b->me = safe_close(b->me);
 134         b->them = safe_close(b->them);
 135         safe_close_pair(b->pipe);
 136         b->barriers = 0;
 137 }
 138
 139 /**
 140  * barrier_set_role() - Set the local role of the barrier
 141  * @b: barrier to operate on
 142  * @role: role to set on the barrier
 143  *
 144  * This sets the roles on a barrier object. This is needed to know
 145  * which side of the barrier you're on. Usually, the parent creates
 146  * the barrier via barrier_create() and then calls fork() or clone().
 147  * Therefore, the FDs are duplicated and the child retains the same
 148  * barrier object.
 149  *
 150  * Both sides need to call barrier_set_role() after fork() or clone()
 151  * are done. If this is not done, barriers will not work correctly.
 152  *
 153  * Note that barriers could be supported without fork() or clone(). However,
 154  * this is currently not needed so it hasn't been implemented.
 155  */
 156 void barrier_set_role(Barrier *b, unsigned int role) {
 157         int fd;
 158
 159         assert(b);
 160         assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
 161         /* make sure this is only called once */
 162         assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
 163
 164         if (role == BARRIER_PARENT)
 165                 b->pipe[1] = safe_close(b->pipe[1]);
 166         else {
 167                 b->pipe[0] = safe_close(b->pipe[0]);
 168
 169                 /* swap me/them for children */
 170                 fd = b->me;
 171                 b->me = b->them;
 172                 b->them = fd;
 173         }
 174 }
 175
 176 /* places barrier; returns false if we aborted, otherwise true */
 177 static bool barrier_write(Barrier *b, uint64_t buf) {
 178         ssize_t len;
 179
 180         /* prevent new sync-points if we already aborted */
 181         if (barrier_i_aborted(b))
 182                 return false;
 183
 184         assert(b->me >= 0);
 185         do {
 186                 len = write(b->me, &buf, sizeof(buf));
 187         } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
 188
 189         if (len != sizeof(buf))
 190                 goto error;
 191
 192         /* lock if we aborted */
 193         if (buf >= (uint64_t)BARRIER_ABORTION) {
 194                 if (barrier_they_aborted(b))
 195                         b->barriers = BARRIER_WE_ABORTED;
 196                 else
 197                         b->barriers = BARRIER_I_ABORTED;
 198         } else if (!barrier_is_aborted(b))
 199                 b->barriers += buf;
 200
 201         return !barrier_i_aborted(b);
 202
 203 error:
 204         /* If there is an unexpected error, we have to make this fatal. There
 205          * is no way we can recover from sync-errors. Therefore, we close the
 206          * pipe-ends and treat this as abortion. The other end will notice the
 207          * pipe-close and treat it as abortion, too. */
 208
 209         safe_close_pair(b->pipe);
 210         b->barriers = BARRIER_WE_ABORTED;
 211         return false;
 212 }
 213
 214 /* waits for barriers; returns false if they aborted, otherwise true */
 215 static bool barrier_read(Barrier *b, int64_t comp) {
 216         if (barrier_they_aborted(b))
 217                 return false;
 218
 219         while (b->barriers > comp) {
 220                 struct pollfd pfd[2] = {
 221                         { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
 222                           .events = POLLHUP },
 223                         { .fd = b->them,
 224                           .events = POLLIN }};
 225                 uint64_t buf;
 226                 int r;
 227
 228                 r = poll(pfd, 2, -1);
 229                 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
 230                         continue;
 231                 else if (r < 0)
 232                         goto error;
 233
 234                 if (pfd[1].revents) {
 235                         ssize_t len;
 236
 237                         /* events on @them signal new data for us */
 238                         len = read(b->them, &buf, sizeof(buf));
 239                         if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
 240                                 continue;
 241
 242                         if (len != sizeof(buf))
 243                                 goto error;
 244                 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
 245                         /* POLLHUP on the pipe tells us the other side exited.
 246                          * We treat this as implicit abortion. But we only
 247                          * handle it if there's no event on the eventfd. This
 248                          * guarantees that exit-abortions do not overwrite real
 249                          * barriers. */
 250                         buf = BARRIER_ABORTION;
 251                 else
 252                         continue;
 253
 254                 /* lock if they aborted */
 255                 if (buf >= (uint64_t)BARRIER_ABORTION) {
 256                         if (barrier_i_aborted(b))
 257                                 b->barriers = BARRIER_WE_ABORTED;
 258                         else
 259                                 b->barriers = BARRIER_THEY_ABORTED;
 260                 } else if (!barrier_is_aborted(b))
 261                         b->barriers -= buf;
 262         }
 263
 264         return !barrier_they_aborted(b);
 265
 266 error:
 267         /* If there is an unexpected error, we have to make this fatal. There
 268          * is no way we can recover from sync-errors. Therefore, we close the
 269          * pipe-ends and treat this as abortion. The other end will notice the
 270          * pipe-close and treat it as abortion, too. */
 271
 272         safe_close_pair(b->pipe);
 273         b->barriers = BARRIER_WE_ABORTED;
 274         return false;
 275 }
 276
 277 /**
 278  * barrier_place() - Place a new barrier
 279  * @b: barrier object
 280  *
 281  * This places a new barrier on the barrier object. If either side already
 282  * aborted, this is a no-op and returns "false". Otherwise, the barrier is
 283  * placed and this returns "true".
 284  *
 285  * Returns: true if barrier was placed, false if either side aborted.
 286  */
 287 bool barrier_place(Barrier *b) {
 288         assert(b);
 289
 290         if (barrier_is_aborted(b))
 291                 return false;
 292
 293         barrier_write(b, BARRIER_SINGLE);
 294         return true;
 295 }
 296
 297 /**
 298  * barrier_abort() - Abort the synchronization
 299  * @b: barrier object to abort
 300  *
 301  * This aborts the barrier-synchronization. If barrier_abort() was already
 302  * called on this side, this is a no-op. Otherwise, the barrier is put into the
 303  * ABORT-state and will stay there. The other side is notified about the
 304  * abortion. Any following attempt to place normal barriers or to wait on normal
 305  * barriers will return immediately as "false".
 306  *
 307  * You can wait for the other side to call barrier_abort(), too. Use
 308  * barrier_wait_abortion() for that.
 309  *
 310  * Returns: false if the other side already aborted, true otherwise.
 311  */
 312 bool barrier_abort(Barrier *b) {
 313         assert(b);
 314
 315         barrier_write(b, BARRIER_ABORTION);
 316         return !barrier_they_aborted(b);
 317 }
 318
 319 /**
 320  * barrier_wait_next() - Wait for the next barrier of the other side
 321  * @b: barrier to operate on
 322  *
 323  * This waits until the other side places its next barrier. This is independent
 324  * of any barrier-links and just waits for any next barrier of the other side.
 325  *
 326  * If either side aborted, this returns false.
 327  *
 328  * Returns: false if either side aborted, true otherwise.
 329  */
 330 bool barrier_wait_next(Barrier *b) {
 331         assert(b);
 332
 333         if (barrier_is_aborted(b))
 334                 return false;
 335
 336         barrier_read(b, b->barriers - 1);
 337         return !barrier_is_aborted(b);
 338 }
 339
 340 /**
 341  * barrier_wait_abortion() - Wait for the other side to abort
 342  * @b: barrier to operate on
 343  *
 344  * This waits until the other side called barrier_abort(). This can be called
 345  * regardless whether the local side already called barrier_abort() or not.
 346  *
 347  * If the other side has already aborted, this returns immediately.
 348  *
 349  * Returns: false if the local side aborted, true otherwise.
 350  */
 351 bool barrier_wait_abortion(Barrier *b) {
 352         assert(b);
 353
 354         barrier_read(b, BARRIER_THEY_ABORTED);
 355         return !barrier_i_aborted(b);
 356 }
 357
 358 /**
 359  * barrier_sync_next() - Wait for the other side to place a next linked barrier
 360  * @b: barrier to operate on
 361  *
 362  * This is like barrier_wait_next() and waits for the other side to call
 363  * barrier_place(). However, this only waits for linked barriers. That means, if
 364  * the other side already placed more barriers than (or as much as) we did, this
 365  * returns immediately instead of waiting.
 366  *
 367  * If either side aborted, this returns false.
 368  *
 369  * Returns: false if either side aborted, true otherwise.
 370  */
 371 bool barrier_sync_next(Barrier *b) {
 372         assert(b);
 373
 374         if (barrier_is_aborted(b))
 375                 return false;
 376
 377         barrier_read(b, MAX((int64_t)0, b->barriers - 1));
 378         return !barrier_is_aborted(b);
 379 }
 380
 381 /**
 382  * barrier_sync() - Wait for the other side to place as many barriers as we did
 383  * @b: barrier to operate on
 384  *
 385  * This is like barrier_sync_next() but waits for the other side to call
 386  * barrier_place() as often as we did (in total). If they already placed as much
 387  * as we did (or more), this returns immediately instead of waiting.
 388  *
 389  * If either side aborted, this returns false.
 390  *
 391  * Returns: false if either side aborted, true otherwise.
 392  */
 393 bool barrier_sync(Barrier *b) {
 394         assert(b);
 395
 396         if (barrier_is_aborted(b))
 397                 return false;
 398
 399         barrier_read(b, 0);
 400         return !barrier_is_aborted(b);
 401 }