src/test/test-barrier.c

   1 /* SPDX-License-Identifier: LGPL-2.1+ */
   2 /***
   3   This file is part of systemd.
   4
   5   Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
   6 ***/
   7
   8 /*
   9  * IPC barrier tests
  10  * These tests verify the correct behavior of the IPC Barrier implementation.
  11  * Note that the tests use alarm-timers to verify dead-locks and timeouts. These
  12  * might not work on slow machines where 20ms are too short to perform specific
  13  * operations (though, very unlikely). In case that turns out true, we have to
  14  * increase it at the slightly cost of lengthen test-duration on other machines.
  15  */
  16
  17 #include <stdio.h>
  18 #include <sys/time.h>
  19 #include <sys/wait.h>
  20 #include <unistd.h>
  21
  22 #include "barrier.h"
  23 #include "util.h"
  24
  25 /* 20ms to test deadlocks; All timings use multiples of this constant as
  26  * alarm/sleep timers. If this timeout is too small for slow machines to perform
  27  * the requested operations, we have to increase it. On an i7 this works fine
  28  * with 1ms base-time, so 20ms should be just fine for everyone. */
  29 #define BASE_TIME (20 * USEC_PER_MSEC)
  30
  31 static void set_alarm(usec_t usecs) {
  32         struct itimerval v = { };
  33
  34         timeval_store(&v.it_value, usecs);
  35         assert_se(setitimer(ITIMER_REAL, &v, NULL) >= 0);
  36 }
  37
  38 static void sleep_for(usec_t usecs) {
  39         /* stupid usleep() might fail if >1000000 */
  40         assert_se(usecs < USEC_PER_SEC);
  41         usleep(usecs);
  42 }
  43
  44 #define TEST_BARRIER(_FUNCTION, _CHILD_CODE, _WAIT_CHILD, _PARENT_CODE, _WAIT_PARENT)  \
  45         static void _FUNCTION(void) {                                   \
  46                 Barrier b = BARRIER_NULL;                               \
  47                 pid_t pid1, pid2;                                       \
  48                                                                         \
  49                 assert_se(barrier_create(&b) >= 0);                     \
  50                 assert_se(b.me > 0);                                    \
  51                 assert_se(b.them > 0);                                  \
  52                 assert_se(b.pipe[0] > 0);                               \
  53                 assert_se(b.pipe[1] > 0);                               \
  54                                                                         \
  55                 pid1 = fork();                                          \
  56                 assert_se(pid1 >= 0);                                   \
  57                 if (pid1 == 0) {                                        \
  58                         barrier_set_role(&b, BARRIER_CHILD);            \
  59                         { _CHILD_CODE; }                                \
  60                         exit(42);                                       \
  61                 }                                                       \
  62                                                                         \
  63                 pid2 = fork();                                          \
  64                 assert_se(pid2 >= 0);                                   \
  65                 if (pid2 == 0) {                                        \
  66                         barrier_set_role(&b, BARRIER_PARENT);           \
  67                         { _PARENT_CODE; }                               \
  68                         exit(42);                                       \
  69                 }                                                       \
  70                                                                         \
  71                 barrier_destroy(&b);                                    \
  72                 set_alarm(999999);                                      \
  73                 { _WAIT_CHILD; }                                        \
  74                 { _WAIT_PARENT; }                                       \
  75                 set_alarm(0);                                           \
  76         }
  77
  78 #define TEST_BARRIER_WAIT_SUCCESS(_pid) \
  79                 ({                                                      \
  80                         int pidr, status;                               \
  81                         pidr = waitpid(_pid, &status, 0);               \
  82                         assert_se(pidr == _pid);                        \
  83                         assert_se(WIFEXITED(status));                   \
  84                         assert_se(WEXITSTATUS(status) == 42);           \
  85                 })
  86
  87 #define TEST_BARRIER_WAIT_ALARM(_pid) \
  88                 ({                                                      \
  89                         int pidr, status;                               \
  90                         pidr = waitpid(_pid, &status, 0);               \
  91                         assert_se(pidr == _pid);                        \
  92                         assert_se(WIFSIGNALED(status));                 \
  93                         assert_se(WTERMSIG(status) == SIGALRM);         \
  94                 })
  95
  96 /*
  97  * Test basic sync points
  98  * This places a barrier in both processes and waits synchronously for them.
  99  * The timeout makes sure the sync works as expected. The sleep_for() on one side
 100  * makes sure the exit of the parent does not overwrite previous barriers. Due
 101  * to the sleep_for(), we know that the parent already exited, thus there's a
 102  * pending HUP on the pipe. However, the barrier_sync() prefers reads on the
 103  * eventfd, thus we can safely wait on the barrier.
 104  */
 105 TEST_BARRIER(test_barrier_sync,
 106         ({
 107                 set_alarm(BASE_TIME * 10);
 108                 assert_se(barrier_place(&b));
 109                 sleep_for(BASE_TIME * 2);
 110                 assert_se(barrier_sync(&b));
 111         }),
 112         TEST_BARRIER_WAIT_SUCCESS(pid1),
 113         ({
 114                 set_alarm(BASE_TIME * 10);
 115                 assert_se(barrier_place(&b));
 116                 assert_se(barrier_sync(&b));
 117         }),
 118         TEST_BARRIER_WAIT_SUCCESS(pid2));
 119
 120 /*
 121  * Test wait_next()
 122  * This places a barrier in the parent and syncs on it. The child sleeps while
 123  * the parent places the barrier and then waits for a barrier. The wait will
 124  * succeed as the child hasn't read the parent's barrier, yet. The following
 125  * barrier and sync synchronize the exit.
 126  */
 127 TEST_BARRIER(test_barrier_wait_next,
 128         ({
 129                 sleep_for(BASE_TIME);
 130                 set_alarm(BASE_TIME * 10);
 131                 assert_se(barrier_wait_next(&b));
 132                 assert_se(barrier_place(&b));
 133                 assert_se(barrier_sync(&b));
 134         }),
 135         TEST_BARRIER_WAIT_SUCCESS(pid1),
 136         ({
 137                 set_alarm(BASE_TIME * 4);
 138                 assert_se(barrier_place(&b));
 139                 assert_se(barrier_sync(&b));
 140         }),
 141         TEST_BARRIER_WAIT_SUCCESS(pid2));
 142
 143 /*
 144  * Test wait_next() multiple times
 145  * This places two barriers in the parent and waits for the child to exit. The
 146  * child sleeps 20ms so both barriers _should_ be in place. It then waits for
 147  * the parent to place the next barrier twice. The first call will fetch both
 148  * barriers and return. However, the second call will stall as the parent does
 149  * not place a 3rd barrier (the sleep caught two barriers). wait_next() is does
 150  * not look at barrier-links so this stall is expected. Thus this test times
 151  * out.
 152  */
 153 TEST_BARRIER(test_barrier_wait_next_twice,
 154         ({
 155                 sleep_for(BASE_TIME);
 156                 set_alarm(BASE_TIME);
 157                 assert_se(barrier_wait_next(&b));
 158                 assert_se(barrier_wait_next(&b));
 159                 assert_se(0);
 160         }),
 161         TEST_BARRIER_WAIT_ALARM(pid1),
 162         ({
 163                 set_alarm(BASE_TIME * 10);
 164                 assert_se(barrier_place(&b));
 165                 assert_se(barrier_place(&b));
 166                 sleep_for(BASE_TIME * 4);
 167         }),
 168         TEST_BARRIER_WAIT_SUCCESS(pid2));
 169
 170 /*
 171  * Test wait_next() with local barriers
 172  * This is the same as test_barrier_wait_next_twice, but places local barriers
 173  * between both waits. This does not have any effect on the wait so it times out
 174  * like the other test.
 175  */
 176 TEST_BARRIER(test_barrier_wait_next_twice_local,
 177         ({
 178                 sleep_for(BASE_TIME);
 179                 set_alarm(BASE_TIME);
 180                 assert_se(barrier_wait_next(&b));
 181                 assert_se(barrier_place(&b));
 182                 assert_se(barrier_place(&b));
 183                 assert_se(barrier_wait_next(&b));
 184                 assert_se(0);
 185         }),
 186         TEST_BARRIER_WAIT_ALARM(pid1),
 187         ({
 188                 set_alarm(BASE_TIME * 10);
 189                 assert_se(barrier_place(&b));
 190                 assert_se(barrier_place(&b));
 191                 sleep_for(BASE_TIME * 4);
 192         }),
 193         TEST_BARRIER_WAIT_SUCCESS(pid2));
 194
 195 /*
 196  * Test wait_next() with sync_next()
 197  * This is again the same as test_barrier_wait_next_twice but uses a
 198  * synced wait as the second wait. This works just fine because the local state
 199  * has no barriers placed, therefore, the remote is always in sync.
 200  */
 201 TEST_BARRIER(test_barrier_wait_next_twice_sync,
 202         ({
 203                 sleep_for(BASE_TIME);
 204                 set_alarm(BASE_TIME);
 205                 assert_se(barrier_wait_next(&b));
 206                 assert_se(barrier_sync_next(&b));
 207         }),
 208         TEST_BARRIER_WAIT_SUCCESS(pid1),
 209         ({
 210                 set_alarm(BASE_TIME * 10);
 211                 assert_se(barrier_place(&b));
 212                 assert_se(barrier_place(&b));
 213         }),
 214         TEST_BARRIER_WAIT_SUCCESS(pid2));
 215
 216 /*
 217  * Test wait_next() with sync_next() and local barriers
 218  * This is again the same as test_barrier_wait_next_twice_local but uses a
 219  * synced wait as the second wait. This works just fine because the local state
 220  * is in sync with the remote.
 221  */
 222 TEST_BARRIER(test_barrier_wait_next_twice_local_sync,
 223         ({
 224                 sleep_for(BASE_TIME);
 225                 set_alarm(BASE_TIME);
 226                 assert_se(barrier_wait_next(&b));
 227                 assert_se(barrier_place(&b));
 228                 assert_se(barrier_place(&b));
 229                 assert_se(barrier_sync_next(&b));
 230         }),
 231         TEST_BARRIER_WAIT_SUCCESS(pid1),
 232         ({
 233                 set_alarm(BASE_TIME * 10);
 234                 assert_se(barrier_place(&b));
 235                 assert_se(barrier_place(&b));
 236         }),
 237         TEST_BARRIER_WAIT_SUCCESS(pid2));
 238
 239 /*
 240  * Test sync_next() and sync()
 241  * This tests sync_*() synchronizations and makes sure they work fine if the
 242  * local state is behind the remote state.
 243  */
 244 TEST_BARRIER(test_barrier_sync_next,
 245         ({
 246                 set_alarm(BASE_TIME * 10);
 247                 assert_se(barrier_sync_next(&b));
 248                 assert_se(barrier_sync(&b));
 249                 assert_se(barrier_place(&b));
 250                 assert_se(barrier_place(&b));
 251                 assert_se(barrier_sync_next(&b));
 252                 assert_se(barrier_sync_next(&b));
 253                 assert_se(barrier_sync(&b));
 254         }),
 255         TEST_BARRIER_WAIT_SUCCESS(pid1),
 256         ({
 257                 set_alarm(BASE_TIME * 10);
 258                 sleep_for(BASE_TIME);
 259                 assert_se(barrier_place(&b));
 260                 assert_se(barrier_place(&b));
 261                 assert_se(barrier_sync(&b));
 262         }),
 263         TEST_BARRIER_WAIT_SUCCESS(pid2));
 264
 265 /*
 266  * Test sync_next() and sync() with local barriers
 267  * This tests timeouts if sync_*() is used if local barriers are placed but the
 268  * remote didn't place any.
 269  */
 270 TEST_BARRIER(test_barrier_sync_next_local,
 271         ({
 272                 set_alarm(BASE_TIME);
 273                 assert_se(barrier_place(&b));
 274                 assert_se(barrier_sync_next(&b));
 275                 assert_se(0);
 276         }),
 277         TEST_BARRIER_WAIT_ALARM(pid1),
 278         ({
 279                 sleep_for(BASE_TIME * 2);
 280         }),
 281         TEST_BARRIER_WAIT_SUCCESS(pid2));
 282
 283 /*
 284  * Test sync_next() and sync() with local barriers and abortion
 285  * This is the same as test_barrier_sync_next_local but aborts the sync in the
 286  * parent. Therefore, the sync_next() succeeds just fine due to the abortion.
 287  */
 288 TEST_BARRIER(test_barrier_sync_next_local_abort,
 289         ({
 290                 set_alarm(BASE_TIME * 10);
 291                 assert_se(barrier_place(&b));
 292                 assert_se(!barrier_sync_next(&b));
 293         }),
 294         TEST_BARRIER_WAIT_SUCCESS(pid1),
 295         ({
 296                 assert_se(barrier_abort(&b));
 297         }),
 298         TEST_BARRIER_WAIT_SUCCESS(pid2));
 299
 300 /*
 301  * Test matched wait_abortion()
 302  * This runs wait_abortion() with remote abortion.
 303  */
 304 TEST_BARRIER(test_barrier_wait_abortion,
 305         ({
 306                 set_alarm(BASE_TIME * 10);
 307                 assert_se(barrier_wait_abortion(&b));
 308         }),
 309         TEST_BARRIER_WAIT_SUCCESS(pid1),
 310         ({
 311                 assert_se(barrier_abort(&b));
 312         }),
 313         TEST_BARRIER_WAIT_SUCCESS(pid2));
 314
 315 /*
 316  * Test unmatched wait_abortion()
 317  * This runs wait_abortion() without any remote abortion going on. It thus must
 318  * timeout.
 319  */
 320 TEST_BARRIER(test_barrier_wait_abortion_unmatched,
 321         ({
 322                 set_alarm(BASE_TIME);
 323                 assert_se(barrier_wait_abortion(&b));
 324                 assert_se(0);
 325         }),
 326         TEST_BARRIER_WAIT_ALARM(pid1),
 327         ({
 328                 sleep_for(BASE_TIME * 2);
 329         }),
 330         TEST_BARRIER_WAIT_SUCCESS(pid2));
 331
 332 /*
 333  * Test matched wait_abortion() with local abortion
 334  * This runs wait_abortion() with local and remote abortion.
 335  */
 336 TEST_BARRIER(test_barrier_wait_abortion_local,
 337         ({
 338                 set_alarm(BASE_TIME * 10);
 339                 assert_se(barrier_abort(&b));
 340                 assert_se(!barrier_wait_abortion(&b));
 341         }),
 342         TEST_BARRIER_WAIT_SUCCESS(pid1),
 343         ({
 344                 assert_se(barrier_abort(&b));
 345         }),
 346         TEST_BARRIER_WAIT_SUCCESS(pid2));
 347
 348 /*
 349  * Test unmatched wait_abortion() with local abortion
 350  * This runs wait_abortion() with only local abortion. This must time out.
 351  */
 352 TEST_BARRIER(test_barrier_wait_abortion_local_unmatched,
 353         ({
 354                 set_alarm(BASE_TIME);
 355                 assert_se(barrier_abort(&b));
 356                 assert_se(!barrier_wait_abortion(&b));
 357                 assert_se(0);
 358         }),
 359         TEST_BARRIER_WAIT_ALARM(pid1),
 360         ({
 361                 sleep_for(BASE_TIME * 2);
 362         }),
 363         TEST_BARRIER_WAIT_SUCCESS(pid2));
 364
 365 /*
 366  * Test child exit
 367  * Place barrier and sync with the child. The child only exits()s, which should
 368  * cause an implicit abortion and wake the parent.
 369  */
 370 TEST_BARRIER(test_barrier_exit,
 371         ({
 372         }),
 373         TEST_BARRIER_WAIT_SUCCESS(pid1),
 374         ({
 375                 set_alarm(BASE_TIME * 10);
 376                 assert_se(barrier_place(&b));
 377                 assert_se(!barrier_sync(&b));
 378         }),
 379         TEST_BARRIER_WAIT_SUCCESS(pid2));
 380
 381 /*
 382  * Test child exit with sleep
 383  * Same as test_barrier_exit but verifies the test really works due to the
 384  * child-exit. We add a usleep() which triggers the alarm in the parent and
 385  * causes the test to time out.
 386  */
 387 TEST_BARRIER(test_barrier_no_exit,
 388         ({
 389                 sleep_for(BASE_TIME * 2);
 390         }),
 391         TEST_BARRIER_WAIT_SUCCESS(pid1),
 392         ({
 393                 set_alarm(BASE_TIME);
 394                 assert_se(barrier_place(&b));
 395                 assert_se(!barrier_sync(&b));
 396         }),
 397         TEST_BARRIER_WAIT_ALARM(pid2));
 398
 399 /*
 400  * Test pending exit against sync
 401  * The parent places a barrier *and* exits. The 20ms wait in the child
 402  * guarantees both are pending. However, our logic prefers pending barriers over
 403  * pending exit-abortions (unlike normal abortions), thus the wait_next() must
 404  * succeed, same for the sync_next() as our local barrier-count is smaller than
 405  * the remote. Once we place a barrier our count is equal, so the sync still
 406  * succeeds. Only if we place one more barrier, we're ahead of the remote, thus
 407  * we will fail due to HUP on the pipe.
 408  */
 409 TEST_BARRIER(test_barrier_pending_exit,
 410         ({
 411                 set_alarm(BASE_TIME * 4);
 412                 sleep_for(BASE_TIME * 2);
 413                 assert_se(barrier_wait_next(&b));
 414                 assert_se(barrier_sync_next(&b));
 415                 assert_se(barrier_place(&b));
 416                 assert_se(barrier_sync_next(&b));
 417                 assert_se(barrier_place(&b));
 418                 assert_se(!barrier_sync_next(&b));
 419         }),
 420         TEST_BARRIER_WAIT_SUCCESS(pid1),
 421         ({
 422                 assert_se(barrier_place(&b));
 423         }),
 424         TEST_BARRIER_WAIT_SUCCESS(pid2));
 425
 426 int main(int argc, char *argv[]) {
 427         /*
 428          * This test uses real-time alarms and sleeps to test for CPU races
 429          * explicitly. This is highly fragile if your system is under load. We
 430          * already increased the BASE_TIME value to make the tests more robust,
 431          * but that just makes the test take significantly longer. Hence,
 432          * disable the test by default, so it will not break CI.
 433          */
 434         if (argc < 2)
 435                 return EXIT_TEST_SKIP;
 436
 437         log_parse_environment();
 438         log_open();
 439
 440         test_barrier_sync();
 441         test_barrier_wait_next();
 442         test_barrier_wait_next_twice();
 443         test_barrier_wait_next_twice_sync();
 444         test_barrier_wait_next_twice_local();
 445         test_barrier_wait_next_twice_local_sync();
 446         test_barrier_sync_next();
 447         test_barrier_sync_next_local();
 448         test_barrier_sync_next_local_abort();
 449         test_barrier_wait_abortion();
 450         test_barrier_wait_abortion_unmatched();
 451         test_barrier_wait_abortion_local();
 452         test_barrier_wait_abortion_local_unmatched();
 453         test_barrier_exit();
 454         test_barrier_no_exit();
 455         test_barrier_pending_exit();
 456
 457         return 0;
 458 }