src/basic/barrier.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
   6
   7   systemd is free software; you can redistribute it and/or modify it
   8   under the terms of the GNU Lesser General Public License as published by
   9   the Free Software Foundation; either version 2.1 of the License, or
  10   (at your option) any later version.
  11
  12   systemd is distributed in the hope that it will be useful, but
  13   WITHOUT ANY WARRANTY; without even the implied warranty of
  14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  15   Lesser General Public License for more details.
  16
  17   You should have received a copy of the GNU Lesser General Public License
  18   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  19 ***/
  20
  21 #include <errno.h>
  22 #include <fcntl.h>
  23 #include <poll.h>
  24 #include <stdbool.h>
  25 #include <stdint.h>
  26 #include <stdlib.h>
  27 #include <sys/eventfd.h>
  28 #include <sys/types.h>
  29 #include <unistd.h>
  30
  31 #include "barrier.h"
  32 #include "fd-util.h"
  33 #include "macro.h"
  34
  35 /**
  36  * Barriers
  37  * This barrier implementation provides a simple synchronization method based
  38  * on file-descriptors that can safely be used between threads and processes. A
  39  * barrier object contains 2 shared counters based on eventfd. Both processes
  40  * can now place barriers and wait for the other end to reach a random or
  41  * specific barrier.
  42  * Barriers are numbered, so you can either wait for the other end to reach any
  43  * barrier or the last barrier that you placed. This way, you can use barriers
  44  * for one-way *and* full synchronization. Note that even-though barriers are
  45  * numbered, these numbers are internal and recycled once both sides reached the
  46  * same barrier (implemented as a simple signed counter). It is thus not
  47  * possible to address barriers by their ID.
  48  *
  49  * Barrier-API: Both ends can place as many barriers via barrier_place() as
  50  * they want and each pair of barriers on both sides will be implicitly linked.
  51  * Each side can use the barrier_wait/sync_*() family of calls to wait for the
  52  * other side to place a specific barrier. barrier_wait_next() waits until the
  53  * other side calls barrier_place(). No links between the barriers are
  54  * considered and this simply serves as most basic asynchronous barrier.
  55  * barrier_sync_next() is like barrier_wait_next() and waits for the other side
  56  * to place their next barrier via barrier_place(). However, it only waits for
  57  * barriers that are linked to a barrier we already placed. If the other side
  58  * already placed more barriers than we did, barrier_sync_next() returns
  59  * immediately.
  60  * barrier_sync() extends barrier_sync_next() and waits until the other end
  61  * placed as many barriers via barrier_place() as we did. If they already placed
  62  * as many as we did (or more), it returns immediately.
  63  *
  64  * Additionally to basic barriers, an abortion event is available.
  65  * barrier_abort() places an abortion event that cannot be undone. An abortion
  66  * immediately cancels all placed barriers and replaces them. Any running and
  67  * following wait/sync call besides barrier_wait_abortion() will immediately
  68  * return false on both sides (otherwise, they always return true).
  69  * barrier_abort() can be called multiple times on both ends and will be a
  70  * no-op if already called on this side.
  71  * barrier_wait_abortion() can be used to wait for the other side to call
  72  * barrier_abort() and is the only wait/sync call that does not return
  73  * immediately if we aborted outself. It only returns once the other side
  74  * called barrier_abort().
  75  *
  76  * Barriers can be used for in-process and inter-process synchronization.
  77  * However, for in-process synchronization you could just use mutexes.
  78  * Therefore, main target is IPC and we require both sides to *not* share the FD
  79  * table. If that's given, barriers provide target tracking: If the remote side
  80  * exit()s, an abortion event is implicitly queued on the other side. This way,
  81  * a sync/wait call will be woken up if the remote side crashed or exited
  82  * unexpectedly. However, note that these abortion events are only queued if the
  83  * barrier-queue has been drained. Therefore, it is safe to place a barrier and
  84  * exit. The other side can safely wait on the barrier even though the exit
  85  * queued an abortion event. Usually, the abortion event would overwrite the
  86  * barrier, however, that's not true for exit-abortion events. Those are only
  87  * queued if the barrier-queue is drained (thus, the receiving side has placed
  88  * more barriers than the remote side).
  89  */
  90
  91 /**
  92  * barrier_create() - Initialize a barrier object
  93  * @obj: barrier to initialize
  94  *
  95  * This initializes a barrier object. The caller is responsible of allocating
  96  * the memory and keeping it valid. The memory does not have to be zeroed
  97  * beforehand.
  98  * Two eventfd objects are allocated for each barrier. If allocation fails, an
  99  * error is returned.
 100  *
 101  * If this function fails, the barrier is reset to an invalid state so it is
 102  * safe to call barrier_destroy() on the object regardless whether the
 103  * initialization succeeded or not.
 104  *
 105  * The caller is responsible to destroy the object via barrier_destroy() before
 106  * releasing the underlying memory.
 107  *
 108  * Returns: 0 on success, negative error code on failure.
 109  */
 110 int barrier_create(Barrier *b) {
 111         _cleanup_(barrier_destroyp) Barrier *staging = b;
 112         int r;
 113
 114         assert(b);
 115
 116         b->me = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 117         if (b->me < 0)
 118                 return -errno;
 119
 120         b->them = eventfd(0, EFD_CLOEXEC | EFD_NONBLOCK);
 121         if (b->them < 0)
 122                 return -errno;
 123
 124         r = pipe2(b->pipe, O_CLOEXEC | O_NONBLOCK);
 125         if (r < 0)
 126                 return -errno;
 127
 128         staging = NULL;
 129         return 0;
 130 }
 131
 132 /**
 133  * barrier_destroy() - Destroy a barrier object
 134  * @b: barrier to destroy or NULL
 135  *
 136  * This destroys a barrier object that has previously been passed to
 137  * barrier_create(). The object is released and reset to invalid
 138  * state. Therefore, it is safe to call barrier_destroy() multiple
 139  * times or even if barrier_create() failed. However, barrier must be
 140  * always initialized with BARRIER_NULL.
 141  *
 142  * If @b is NULL, this is a no-op.
 143  */
 144 void barrier_destroy(Barrier *b) {
 145         if (!b)
 146                 return;
 147
 148         b->me = safe_close(b->me);
 149         b->them = safe_close(b->them);
 150         safe_close_pair(b->pipe);
 151         b->barriers = 0;
 152 }
 153
 154 /**
 155  * barrier_set_role() - Set the local role of the barrier
 156  * @b: barrier to operate on
 157  * @role: role to set on the barrier
 158  *
 159  * This sets the roles on a barrier object. This is needed to know
 160  * which side of the barrier you're on. Usually, the parent creates
 161  * the barrier via barrier_create() and then calls fork() or clone().
 162  * Therefore, the FDs are duplicated and the child retains the same
 163  * barrier object.
 164  *
 165  * Both sides need to call barrier_set_role() after fork() or clone()
 166  * are done. If this is not done, barriers will not work correctly.
 167  *
 168  * Note that barriers could be supported without fork() or clone(). However,
 169  * this is currently not needed so it hasn't been implemented.
 170  */
 171 void barrier_set_role(Barrier *b, unsigned int role) {
 172         int fd;
 173
 174         assert(b);
 175         assert(IN_SET(role, BARRIER_PARENT, BARRIER_CHILD));
 176         /* make sure this is only called once */
 177         assert(b->pipe[0] >= 0 && b->pipe[1] >= 0);
 178
 179         if (role == BARRIER_PARENT)
 180                 b->pipe[1] = safe_close(b->pipe[1]);
 181         else {
 182                 b->pipe[0] = safe_close(b->pipe[0]);
 183
 184                 /* swap me/them for children */
 185                 fd = b->me;
 186                 b->me = b->them;
 187                 b->them = fd;
 188         }
 189 }
 190
 191 /* places barrier; returns false if we aborted, otherwise true */
 192 static bool barrier_write(Barrier *b, uint64_t buf) {
 193         ssize_t len;
 194
 195         /* prevent new sync-points if we already aborted */
 196         if (barrier_i_aborted(b))
 197                 return false;
 198
 199         assert(b->me >= 0);
 200         do {
 201                 len = write(b->me, &buf, sizeof(buf));
 202         } while (len < 0 && IN_SET(errno, EAGAIN, EINTR));
 203
 204         if (len != sizeof(buf))
 205                 goto error;
 206
 207         /* lock if we aborted */
 208         if (buf >= (uint64_t)BARRIER_ABORTION) {
 209                 if (barrier_they_aborted(b))
 210                         b->barriers = BARRIER_WE_ABORTED;
 211                 else
 212                         b->barriers = BARRIER_I_ABORTED;
 213         } else if (!barrier_is_aborted(b))
 214                 b->barriers += buf;
 215
 216         return !barrier_i_aborted(b);
 217
 218 error:
 219         /* If there is an unexpected error, we have to make this fatal. There
 220          * is no way we can recover from sync-errors. Therefore, we close the
 221          * pipe-ends and treat this as abortion. The other end will notice the
 222          * pipe-close and treat it as abortion, too. */
 223
 224         safe_close_pair(b->pipe);
 225         b->barriers = BARRIER_WE_ABORTED;
 226         return false;
 227 }
 228
 229 /* waits for barriers; returns false if they aborted, otherwise true */
 230 static bool barrier_read(Barrier *b, int64_t comp) {
 231         if (barrier_they_aborted(b))
 232                 return false;
 233
 234         while (b->barriers > comp) {
 235                 struct pollfd pfd[2] = {
 236                         { .fd = b->pipe[0] >= 0 ? b->pipe[0] : b->pipe[1],
 237                           .events = POLLHUP },
 238                         { .fd = b->them,
 239                           .events = POLLIN }};
 240                 uint64_t buf;
 241                 int r;
 242
 243                 r = poll(pfd, 2, -1);
 244                 if (r < 0 && IN_SET(errno, EAGAIN, EINTR))
 245                         continue;
 246                 else if (r < 0)
 247                         goto error;
 248
 249                 if (pfd[1].revents) {
 250                         ssize_t len;
 251
 252                         /* events on @them signal new data for us */
 253                         len = read(b->them, &buf, sizeof(buf));
 254                         if (len < 0 && IN_SET(errno, EAGAIN, EINTR))
 255                                 continue;
 256
 257                         if (len != sizeof(buf))
 258                                 goto error;
 259                 } else if (pfd[0].revents & (POLLHUP | POLLERR | POLLNVAL))
 260                         /* POLLHUP on the pipe tells us the other side exited.
 261                          * We treat this as implicit abortion. But we only
 262                          * handle it if there's no event on the eventfd. This
 263                          * guarantees that exit-abortions do not overwrite real
 264                          * barriers. */
 265                         buf = BARRIER_ABORTION;
 266                 else
 267                         continue;
 268
 269                 /* lock if they aborted */
 270                 if (buf >= (uint64_t)BARRIER_ABORTION) {
 271                         if (barrier_i_aborted(b))
 272                                 b->barriers = BARRIER_WE_ABORTED;
 273                         else
 274                                 b->barriers = BARRIER_THEY_ABORTED;
 275                 } else if (!barrier_is_aborted(b))
 276                         b->barriers -= buf;
 277         }
 278
 279         return !barrier_they_aborted(b);
 280
 281 error:
 282         /* If there is an unexpected error, we have to make this fatal. There
 283          * is no way we can recover from sync-errors. Therefore, we close the
 284          * pipe-ends and treat this as abortion. The other end will notice the
 285          * pipe-close and treat it as abortion, too. */
 286
 287         safe_close_pair(b->pipe);
 288         b->barriers = BARRIER_WE_ABORTED;
 289         return false;
 290 }
 291
 292 /**
 293  * barrier_place() - Place a new barrier
 294  * @b: barrier object
 295  *
 296  * This places a new barrier on the barrier object. If either side already
 297  * aborted, this is a no-op and returns "false". Otherwise, the barrier is
 298  * placed and this returns "true".
 299  *
 300  * Returns: true if barrier was placed, false if either side aborted.
 301  */
 302 bool barrier_place(Barrier *b) {
 303         assert(b);
 304
 305         if (barrier_is_aborted(b))
 306                 return false;
 307
 308         barrier_write(b, BARRIER_SINGLE);
 309         return true;
 310 }
 311
 312 /**
 313  * barrier_abort() - Abort the synchronization
 314  * @b: barrier object to abort
 315  *
 316  * This aborts the barrier-synchronization. If barrier_abort() was already
 317  * called on this side, this is a no-op. Otherwise, the barrier is put into the
 318  * ABORT-state and will stay there. The other side is notified about the
 319  * abortion. Any following attempt to place normal barriers or to wait on normal
 320  * barriers will return immediately as "false".
 321  *
 322  * You can wait for the other side to call barrier_abort(), too. Use
 323  * barrier_wait_abortion() for that.
 324  *
 325  * Returns: false if the other side already aborted, true otherwise.
 326  */
 327 bool barrier_abort(Barrier *b) {
 328         assert(b);
 329
 330         barrier_write(b, BARRIER_ABORTION);
 331         return !barrier_they_aborted(b);
 332 }
 333
 334 /**
 335  * barrier_wait_next() - Wait for the next barrier of the other side
 336  * @b: barrier to operate on
 337  *
 338  * This waits until the other side places its next barrier. This is independent
 339  * of any barrier-links and just waits for any next barrier of the other side.
 340  *
 341  * If either side aborted, this returns false.
 342  *
 343  * Returns: false if either side aborted, true otherwise.
 344  */
 345 bool barrier_wait_next(Barrier *b) {
 346         assert(b);
 347
 348         if (barrier_is_aborted(b))
 349                 return false;
 350
 351         barrier_read(b, b->barriers - 1);
 352         return !barrier_is_aborted(b);
 353 }
 354
 355 /**
 356  * barrier_wait_abortion() - Wait for the other side to abort
 357  * @b: barrier to operate on
 358  *
 359  * This waits until the other side called barrier_abort(). This can be called
 360  * regardless whether the local side already called barrier_abort() or not.
 361  *
 362  * If the other side has already aborted, this returns immediately.
 363  *
 364  * Returns: false if the local side aborted, true otherwise.
 365  */
 366 bool barrier_wait_abortion(Barrier *b) {
 367         assert(b);
 368
 369         barrier_read(b, BARRIER_THEY_ABORTED);
 370         return !barrier_i_aborted(b);
 371 }
 372
 373 /**
 374  * barrier_sync_next() - Wait for the other side to place a next linked barrier
 375  * @b: barrier to operate on
 376  *
 377  * This is like barrier_wait_next() and waits for the other side to call
 378  * barrier_place(). However, this only waits for linked barriers. That means, if
 379  * the other side already placed more barriers than (or as much as) we did, this
 380  * returns immediately instead of waiting.
 381  *
 382  * If either side aborted, this returns false.
 383  *
 384  * Returns: false if either side aborted, true otherwise.
 385  */
 386 bool barrier_sync_next(Barrier *b) {
 387         assert(b);
 388
 389         if (barrier_is_aborted(b))
 390                 return false;
 391
 392         barrier_read(b, MAX((int64_t)0, b->barriers - 1));
 393         return !barrier_is_aborted(b);
 394 }
 395
 396 /**
 397  * barrier_sync() - Wait for the other side to place as many barriers as we did
 398  * @b: barrier to operate on
 399  *
 400  * This is like barrier_sync_next() but waits for the other side to call
 401  * barrier_place() as often as we did (in total). If they already placed as much
 402  * as we did (or more), this returns immediately instead of waiting.
 403  *
 404  * If either side aborted, this returns false.
 405  *
 406  * Returns: false if either side aborted, true otherwise.
 407  */
 408 bool barrier_sync(Barrier *b) {
 409         assert(b);
 410
 411         if (barrier_is_aborted(b))
 412                 return false;
 413
 414         barrier_read(b, 0);
 415         return !barrier_is_aborted(b);
 416 }