src/basic/barrier.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
   6 ***/
   7
   8 #include <errno.h>
   9 #include <fcntl.h>
  10 #include <poll.h>
  11 #include <stdbool.h>
  12 #include <stdint.h>
  13 #include <stdlib.h>
  14 #include <sys/eventfd.h>
  15 #include <sys/types.h>
  16 #include <unistd.h>
  17
  18 #include "barrier.h"
  19 #include "fd-util.h"
  20 #include "macro.h"
  21
  22 /**
  23  * Barriers
  24  * This barrier implementation provides a simple synchronization method based
  25  * on file-descriptors that can safely be used between threads and processes. A
  26  * barrier object contains 2 shared counters based on eventfd. Both processes
  27  * can now place barriers and wait for the other end to reach a random or
  28  * specific barrier.
  29  * Barriers are numbered, so you can either wait for the other end to reach any
  30  * barrier or the last barrier that you placed. This way, you can use barriers
  31  * for one-way *and* full synchronization. Note that even-though barriers are
  32  * numbered, these numbers are internal and recycled once both sides reached the
  33  * same barrier (implemented as a simple signed counter). It is thus not
  34  * possible to address barriers by their ID.
  35  *
  36  * Barrier-API: Both ends can place as many barriers via barrier_place() as
  37  * they want and each pair of barriers on both sides will be implicitly linked.
  38  * Each side can use the barrier_wait/sync_*() family of calls to wait for the
  39  * other side to place a specific barrier. barrier_wait_next() waits until the
  40  * other side calls barrier_place(). No links between the barriers are
  41  * considered and this simply serves as most basic asynchronous barrier.
  42  * barrier_sync_next() is like barrier_wait_next() and waits for the other side
  43  * to place their next barrier via barrier_place(). However, it only waits for
  44  * barriers that are linked to a barrier we already placed. If the other side
  45  * already placed more barriers than we did, barrier_sync_next() returns
  46  * immediately.
  47  * barrier_sync() extends barrier_sync_next() and waits until the other end
  48  * placed as many barriers via barrier_place() as we did. If they already placed
  49  * as many as we did (or more), it returns immediately.
  50  *
  51  * Additionally to basic barriers, an abortion event is available.
  52  * barrier_abort() places an abortion event that cannot be undone. An abortion
  53  * immediately cancels all placed barriers and replaces them. Any running and
  54  * following wait/sync call besides barrier_wait_abortion() will immediately
  55  * return false on both sides (otherwise, they always return true).
  56  * barrier_abort() can be called multiple times on both ends and will be a
  57  * no-op if already called on this side.
  58  * barrier_wait_abortion() can be used to wait for the other side to call
  59  * barrier_abort() and is the only wait/sync call that does not return
  60  * immediately if we aborted outself. It only returns once the other side
  61  * called barrier_abort().
  62  *
  63  * Barriers can be used for in-process and inter-process synchronization.
  64  * However, for in-process synchronization you could just use mutexes.
  65  * Therefore, main target is IPC and we require both sides to *not* share the FD
  66  * table. If that's given, barriers provide target tracking: If the remote side
  67  * exit()s, an abortion event is implicitly queued on the other side. This way,
  68  * a sync/wait call will be woken up if the remote side crashed or exited
  69  * unexpectedly. However, note that these abortion events are only queued if the
  70  * barrier-queue has been drained. Therefore, it is safe to place a barrier and
  71  * exit. The other side can safely wait on the barrier even though the exit
  72  * queued an abortion event. Usually, the abortion event would overwrite the
  73  * barrier, however, that's not true for exit-abortion events. Those are only
  74  * queued if the barrier-queue is drained (thus, the receiving side has placed
  75  * more barriers than the remote side).
  76  */
  77
  78 /**
  79  * barrier_create() - Initialize a barrier object
  80  * @obj: barrier to initialize
  81  *
  82  * This initializes a barrier object. The caller is responsible of allocating
  83  * the memory and keeping it valid. The memory does not have to be zeroed
  84  * beforehand.
  85  * Two eventfd objects are allocated for each barrier. If allocation fails, an
  86  * error is returned.
  87  *
  88  * If this function fails, the barrier is reset to an invalid state so it is
  89  * safe to call barrier_destroy() on the object regardless whether the
  90  * initialization succeeded or not.
  91  *
  92  * The caller is responsible to destroy the object via barrier_destroy() before
  93  * releasing the underlying memory.
  94  *
  95  * Returns: 0 on success, negative error code on failure.
  96  */
  97 int barrier_create(Barrier *b) {
  98         _cleanup_(barrier_destroyp) Barrier *staging = b;
  99         int r;
 100
 101         assert(b);
 102
 103         b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 104         if (b->me < 0)
 105                 return -errno;
 106
 107         b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 108         if (b->them < 0)
 109                 return -errno;
 110
 111         r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
 112         if (r < 0)
 113                 return -errno;
 114
 115         staging = NULL;
 116         return 0;
 117 }
 118
 119 /**
 120  * barrier_destroy() - Destroy a barrier object
 121  * @b: barrier to destroy or NULL
 122  *
 123  * This destroys a barrier object that has previously been passed to
 124  * barrier_create(). The object is released and reset to invalid
 125  * state. Therefore, it is safe to call barrier_destroy() multiple
 126  * times or even if barrier_create() failed. However, barrier must be
 127  * always initialized with BARRIER_NULL.
 128  *
 129  * If @b is NULL, this is a no-op.
 130  */
 131 void barrier_destroy(Barrier *b) {
 132         if (!b)
 133                 return;
 134
 135         b->me = safe_close(b->me);
 136         b->them = safe_close(b->them);
 137         safe_close_pair(b->pipe);
 138         b->barriers = 0;
 139 }
 140
 141 /**
 142  * barrier_set_role() - Set the local role of the barrier
 143  * @b: barrier to operate on
 144  * @role: role to set on the barrier
 145  *
 146  * This sets the roles on a barrier object. This is needed to know
 147  * which side of the barrier you're on. Usually, the parent creates
 148  * the barrier via barrier_create() and then calls fork() or clone().
 149  * Therefore, the FDs are duplicated and the child retains the same
 150  * barrier object.
 151  *
 152  * Both sides need to call barrier_set_role() after fork() or clone()
 153  * are done. If this is not done, barriers will not work correctly.
 154  *
 155  * Note that barriers could be supported without fork() or clone(). However,
 156  * this is currently not needed so it hasn't been implemented.
 157  */
 158 void barrier_set_role(Barrier *b, unsigned int role) {
 159         int fd;
 160
 161         assert(b);
 162         assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
 163         /* make sure this is only called once */
 164         assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
 165
 166         if (role == BARRIER_PARENT)
 167                 b->pipe[1] = safe_close(b->pipe[1]);
 168         else {
 169                 b->pipe[0] = safe_close(b->pipe[0]);
 170
 171                 /* swap me/them for children */
 172                 fd = b->me;
 173                 b->me = b->them;
 174                 b->them = fd;
 175         }
 176 }
 177
 178 /* places barrier; returns false if we aborted, otherwise true */
 179 static bool barrier_write(Barrier *b, uint64_t buf) {
 180         ssize_t len;
 181
 182         /* prevent new sync-points if we already aborted */
 183         if (barrier_i_aborted(b))
 184                 return false;
 185
 186         assert(b->me >= 0);
 187         do {
 188                 len = write(b->me, &buf, sizeof(buf));
 189         } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
 190
 191         if (len != sizeof(buf))
 192                 goto error;
 193
 194         /* lock if we aborted */
 195         if (buf >= (uint64_t)BARRIER_ABORTION) {
 196                 if (barrier_they_aborted(b))
 197                         b->barriers = BARRIER_WE_ABORTED;
 198                 else
 199                         b->barriers = BARRIER_I_ABORTED;
 200         } else if (!barrier_is_aborted(b))
 201                 b->barriers += buf;
 202
 203         return !barrier_i_aborted(b);
 204
 205 error:
 206         /* If there is an unexpected error, we have to make this fatal. There
 207          * is no way we can recover from sync-errors. Therefore, we close the
 208          * pipe-ends and treat this as abortion. The other end will notice the
 209          * pipe-close and treat it as abortion, too. */
 210
 211         safe_close_pair(b->pipe);
 212         b->barriers = BARRIER_WE_ABORTED;
 213         return false;
 214 }
 215
 216 /* waits for barriers; returns false if they aborted, otherwise true */
 217 static bool barrier_read(Barrier *b, int64_t comp) {
 218         if (barrier_they_aborted(b))
 219                 return false;
 220
 221         while (b->barriers > comp) {
 222                 struct pollfd pfd[2] = {
 223                         { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
 224                           .events = POLLHUP },
 225                         { .fd = b->them,
 226                           .events = POLLIN }};
 227                 uint64_t buf;
 228                 int r;
 229
 230                 r = poll(pfd, 2, -1);
 231                 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
 232                         continue;
 233                 else if (r < 0)
 234                         goto error;
 235
 236                 if (pfd[1].revents) {
 237                         ssize_t len;
 238
 239                         /* events on @them signal new data for us */
 240                         len = read(b->them, &buf, sizeof(buf));
 241                         if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
 242                                 continue;
 243
 244                         if (len != sizeof(buf))
 245                                 goto error;
 246                 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
 247                         /* POLLHUP on the pipe tells us the other side exited.
 248                          * We treat this as implicit abortion. But we only
 249                          * handle it if there's no event on the eventfd. This
 250                          * guarantees that exit-abortions do not overwrite real
 251                          * barriers. */
 252                         buf = BARRIER_ABORTION;
 253                 else
 254                         continue;
 255
 256                 /* lock if they aborted */
 257                 if (buf >= (uint64_t)BARRIER_ABORTION) {
 258                         if (barrier_i_aborted(b))
 259                                 b->barriers = BARRIER_WE_ABORTED;
 260                         else
 261                                 b->barriers = BARRIER_THEY_ABORTED;
 262                 } else if (!barrier_is_aborted(b))
 263                         b->barriers -= buf;
 264         }
 265
 266         return !barrier_they_aborted(b);
 267
 268 error:
 269         /* If there is an unexpected error, we have to make this fatal. There
 270          * is no way we can recover from sync-errors. Therefore, we close the
 271          * pipe-ends and treat this as abortion. The other end will notice the
 272          * pipe-close and treat it as abortion, too. */
 273
 274         safe_close_pair(b->pipe);
 275         b->barriers = BARRIER_WE_ABORTED;
 276         return false;
 277 }
 278
 279 /**
 280  * barrier_place() - Place a new barrier
 281  * @b: barrier object
 282  *
 283  * This places a new barrier on the barrier object. If either side already
 284  * aborted, this is a no-op and returns "false". Otherwise, the barrier is
 285  * placed and this returns "true".
 286  *
 287  * Returns: true if barrier was placed, false if either side aborted.
 288  */
 289 bool barrier_place(Barrier *b) {
 290         assert(b);
 291
 292         if (barrier_is_aborted(b))
 293                 return false;
 294
 295         barrier_write(b, BARRIER_SINGLE);
 296         return true;
 297 }
 298
 299 /**
 300  * barrier_abort() - Abort the synchronization
 301  * @b: barrier object to abort
 302  *
 303  * This aborts the barrier-synchronization. If barrier_abort() was already
 304  * called on this side, this is a no-op. Otherwise, the barrier is put into the
 305  * ABORT-state and will stay there. The other side is notified about the
 306  * abortion. Any following attempt to place normal barriers or to wait on normal
 307  * barriers will return immediately as "false".
 308  *
 309  * You can wait for the other side to call barrier_abort(), too. Use
 310  * barrier_wait_abortion() for that.
 311  *
 312  * Returns: false if the other side already aborted, true otherwise.
 313  */
 314 bool barrier_abort(Barrier *b) {
 315         assert(b);
 316
 317         barrier_write(b, BARRIER_ABORTION);
 318         return !barrier_they_aborted(b);
 319 }
 320
 321 /**
 322  * barrier_wait_next() - Wait for the next barrier of the other side
 323  * @b: barrier to operate on
 324  *
 325  * This waits until the other side places its next barrier. This is independent
 326  * of any barrier-links and just waits for any next barrier of the other side.
 327  *
 328  * If either side aborted, this returns false.
 329  *
 330  * Returns: false if either side aborted, true otherwise.
 331  */
 332 bool barrier_wait_next(Barrier *b) {
 333         assert(b);
 334
 335         if (barrier_is_aborted(b))
 336                 return false;
 337
 338         barrier_read(b, b->barriers - 1);
 339         return !barrier_is_aborted(b);
 340 }
 341
 342 /**
 343  * barrier_wait_abortion() - Wait for the other side to abort
 344  * @b: barrier to operate on
 345  *
 346  * This waits until the other side called barrier_abort(). This can be called
 347  * regardless whether the local side already called barrier_abort() or not.
 348  *
 349  * If the other side has already aborted, this returns immediately.
 350  *
 351  * Returns: false if the local side aborted, true otherwise.
 352  */
 353 bool barrier_wait_abortion(Barrier *b) {
 354         assert(b);
 355
 356         barrier_read(b, BARRIER_THEY_ABORTED);
 357         return !barrier_i_aborted(b);
 358 }
 359
 360 /**
 361  * barrier_sync_next() - Wait for the other side to place a next linked barrier
 362  * @b: barrier to operate on
 363  *
 364  * This is like barrier_wait_next() and waits for the other side to call
 365  * barrier_place(). However, this only waits for linked barriers. That means, if
 366  * the other side already placed more barriers than (or as much as) we did, this
 367  * returns immediately instead of waiting.
 368  *
 369  * If either side aborted, this returns false.
 370  *
 371  * Returns: false if either side aborted, true otherwise.
 372  */
 373 bool barrier_sync_next(Barrier *b) {
 374         assert(b);
 375
 376         if (barrier_is_aborted(b))
 377                 return false;
 378
 379         barrier_read(b, MAX((int64_t)0, b->barriers - 1));
 380         return !barrier_is_aborted(b);
 381 }
 382
 383 /**
 384  * barrier_sync() - Wait for the other side to place as many barriers as we did
 385  * @b: barrier to operate on
 386  *
 387  * This is like barrier_sync_next() but waits for the other side to call
 388  * barrier_place() as often as we did (in total). If they already placed as much
 389  * as we did (or more), this returns immediately instead of waiting.
 390  *
 391  * If either side aborted, this returns false.
 392  *
 393  * Returns: false if either side aborted, true otherwise.
 394  */
 395 bool barrier_sync(Barrier *b) {
 396         assert(b);
 397
 398         if (barrier_is_aborted(b))
 399                 return false;
 400
 401         barrier_read(b, 0);
 402         return !barrier_is_aborted(b);
 403 }