src/test/test-barrier.c

   1 /***
   2   This file is part of systemd.
   3
   4   Copyright 2014 David Herrmann <dh.herrmann@gmail.com>
   5
   6   systemd is free software; you can redistribute it and/or modify it
   7   under the terms of the GNU Lesser General Public License as published by
   8   the Free Software Foundation; either version 2.1 of the License, or
   9   (at your option) any later version.
  10
  11   systemd is distributed in the hope that it will be useful, but
  12   WITHOUT ANY WARRANTY; without even the implied warranty of
  13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  14   Lesser General Public License for more details.
  15
  16   You should have received a copy of the GNU Lesser General Public License
  17   along with systemd; If not, see <http://www.gnu.org/licenses/>.
  18 ***/
  19
  20 /*
  21  * IPC barrier tests
  22  * These tests verify the correct behavior of the IPC Barrier implementation.
  23  * Note that the tests use alarm-timers to verify dead-locks and timeouts. These
  24  * might not work on slow machines where 20ms are too short to perform specific
  25  * operations (though, very unlikely). In case that turns out true, we have to
  26  * increase it at the slightly cost of lengthen test-duration on other machines.
  27  */
  28
  29 #include <stdio.h>
  30 #include <sys/time.h>
  31 #include <sys/wait.h>
  32 #include <unistd.h>
  33
  34 #include "barrier.h"
  35 #include "util.h"
  36
  37 /* 20ms to test deadlocks; All timings use multiples of this constant as
  38  * alarm/sleep timers. If this timeout is too small for slow machines to perform
  39  * the requested operations, we have to increase it. On an i7 this works fine
  40  * with 1ms base-time, so 20ms should be just fine for everyone. */
  41 #define BASE_TIME (20 * USEC_PER_MSEC)
  42
  43 static void set_alarm(usec_t usecs) {
  44         struct itimerval v = { };
  45
  46         timeval_store(&v.it_value, usecs);
  47         assert_se(setitimer(ITIMER_REAL, &v, NULL) >= 0);
  48 }
  49
  50 static void sleep_for(usec_t usecs) {
  51         /* stupid usleep() might fail if >1000000 */
  52         assert_se(usecs < USEC_PER_SEC);
  53         usleep(usecs);
  54 }
  55
  56 #define TEST_BARRIER(_FUNCTION, _CHILD_CODE, _WAIT_CHILD, _PARENT_CODE, _WAIT_PARENT)  \
  57         static void _FUNCTION(void) {                                   \
  58                 Barrier b = BARRIER_NULL;                               \
  59                 pid_t pid1, pid2;                                       \
  60                                                                         \
  61                 assert_se(barrier_create(&b) >= 0);                     \
  62                 assert_se(b.me > 0);                                    \
  63                 assert_se(b.them > 0);                                  \
  64                 assert_se(b.pipe[0] > 0);                               \
  65                 assert_se(b.pipe[1] > 0);                               \
  66                                                                         \
  67                 pid1 = fork();                                          \
  68                 assert_se(pid1 >= 0);                                   \
  69                 if (pid1 == 0) {                                        \
  70                         barrier_set_role(&b, BARRIER_CHILD);            \
  71                         { _CHILD_CODE; }                                \
  72                         exit(42);                                       \
  73                 }                                                       \
  74                                                                         \
  75                 pid2 = fork();                                          \
  76                 assert_se(pid2 >= 0);                                   \
  77                 if (pid2 == 0) {                                        \
  78                         barrier_set_role(&b, BARRIER_PARENT);           \
  79                         { _PARENT_CODE; }                               \
  80                         exit(42);                                       \
  81                 }                                                       \
  82                                                                         \
  83                 barrier_destroy(&b);                                    \
  84                 set_alarm(999999);                                      \
  85                 { _WAIT_CHILD; }                                        \
  86                 { _WAIT_PARENT; }                                       \
  87                 set_alarm(0);                                           \
  88         }
  89
  90 #define TEST_BARRIER_WAIT_SUCCESS(_pid) \
  91                 ({                                                      \
  92                         int pidr, status;                               \
  93                         pidr = waitpid(_pid, &status, 0);               \
  94                         assert_se(pidr == _pid);                        \
  95                         assert_se(WIFEXITED(status));                   \
  96                         assert_se(WEXITSTATUS(status) == 42);           \
  97                 })
  98
  99 #define TEST_BARRIER_WAIT_ALARM(_pid) \
 100                 ({                                                      \
 101                         int pidr, status;                               \
 102                         pidr = waitpid(_pid, &status, 0);               \
 103                         assert_se(pidr == _pid);                        \
 104                         assert_se(WIFSIGNALED(status));                 \
 105                         assert_se(WTERMSIG(status) == SIGALRM);         \
 106                 })
 107
 108 /*
 109  * Test basic sync points
 110  * This places a barrier in both processes and waits synchronously for them.
 111  * The timeout makes sure the sync works as expected. The sleep_for() on one side
 112  * makes sure the exit of the parent does not overwrite previous barriers. Due
 113  * to the sleep_for(), we know that the parent already exited, thus there's a
 114  * pending HUP on the pipe. However, the barrier_sync() prefers reads on the
 115  * eventfd, thus we can safely wait on the barrier.
 116  */
 117 TEST_BARRIER(test_barrier_sync,
 118         ({
 119                 set_alarm(BASE_TIME * 10);
 120                 assert_se(barrier_place(&b));
 121                 sleep_for(BASE_TIME * 2);
 122                 assert_se(barrier_sync(&b));
 123         }),
 124         TEST_BARRIER_WAIT_SUCCESS(pid1),
 125         ({
 126                 set_alarm(BASE_TIME * 10);
 127                 assert_se(barrier_place(&b));
 128                 assert_se(barrier_sync(&b));
 129         }),
 130         TEST_BARRIER_WAIT_SUCCESS(pid2));
 131
 132 /*
 133  * Test wait_next()
 134  * This places a barrier in the parent and syncs on it. The child sleeps while
 135  * the parent places the barrier and then waits for a barrier. The wait will
 136  * succeed as the child hasn't read the parent's barrier, yet. The following
 137  * barrier and sync synchronize the exit.
 138  */
 139 TEST_BARRIER(test_barrier_wait_next,
 140         ({
 141                 sleep_for(BASE_TIME);
 142                 set_alarm(BASE_TIME * 10);
 143                 assert_se(barrier_wait_next(&b));
 144                 assert_se(barrier_place(&b));
 145                 assert_se(barrier_sync(&b));
 146         }),
 147         TEST_BARRIER_WAIT_SUCCESS(pid1),
 148         ({
 149                 set_alarm(BASE_TIME * 4);
 150                 assert_se(barrier_place(&b));
 151                 assert_se(barrier_sync(&b));
 152         }),
 153         TEST_BARRIER_WAIT_SUCCESS(pid2));
 154
 155 /*
 156  * Test wait_next() multiple times
 157  * This places two barriers in the parent and waits for the child to exit. The
 158  * child sleeps 20ms so both barriers _should_ be in place. It then waits for
 159  * the parent to place the next barrier twice. The first call will fetch both
 160  * barriers and return. However, the second call will stall as the parent does
 161  * not place a 3rd barrier (the sleep caught two barriers). wait_next() is does
 162  * not look at barrier-links so this stall is expected. Thus this test times
 163  * out.
 164  */
 165 TEST_BARRIER(test_barrier_wait_next_twice,
 166         ({
 167                 sleep_for(BASE_TIME);
 168                 set_alarm(BASE_TIME);
 169                 assert_se(barrier_wait_next(&b));
 170                 assert_se(barrier_wait_next(&b));
 171                 assert_se(0);
 172         }),
 173         TEST_BARRIER_WAIT_ALARM(pid1),
 174         ({
 175                 set_alarm(BASE_TIME * 10);
 176                 assert_se(barrier_place(&b));
 177                 assert_se(barrier_place(&b));
 178                 sleep_for(BASE_TIME * 4);
 179         }),
 180         TEST_BARRIER_WAIT_SUCCESS(pid2));
 181
 182 /*
 183  * Test wait_next() with local barriers
 184  * This is the same as test_barrier_wait_next_twice, but places local barriers
 185  * between both waits. This does not have any effect on the wait so it times out
 186  * like the other test.
 187  */
 188 TEST_BARRIER(test_barrier_wait_next_twice_local,
 189         ({
 190                 sleep_for(BASE_TIME);
 191                 set_alarm(BASE_TIME);
 192                 assert_se(barrier_wait_next(&b));
 193                 assert_se(barrier_place(&b));
 194                 assert_se(barrier_place(&b));
 195                 assert_se(barrier_wait_next(&b));
 196                 assert_se(0);
 197         }),
 198         TEST_BARRIER_WAIT_ALARM(pid1),
 199         ({
 200                 set_alarm(BASE_TIME * 10);
 201                 assert_se(barrier_place(&b));
 202                 assert_se(barrier_place(&b));
 203                 sleep_for(BASE_TIME * 4);
 204         }),
 205         TEST_BARRIER_WAIT_SUCCESS(pid2));
 206
 207 /*
 208  * Test wait_next() with sync_next()
 209  * This is again the same as test_barrier_wait_next_twice but uses a
 210  * synced wait as the second wait. This works just fine because the local state
 211  * has no barriers placed, therefore, the remote is always in sync.
 212  */
 213 TEST_BARRIER(test_barrier_wait_next_twice_sync,
 214         ({
 215                 sleep_for(BASE_TIME);
 216                 set_alarm(BASE_TIME);
 217                 assert_se(barrier_wait_next(&b));
 218                 assert_se(barrier_sync_next(&b));
 219         }),
 220         TEST_BARRIER_WAIT_SUCCESS(pid1),
 221         ({
 222                 set_alarm(BASE_TIME * 10);
 223                 assert_se(barrier_place(&b));
 224                 assert_se(barrier_place(&b));
 225         }),
 226         TEST_BARRIER_WAIT_SUCCESS(pid2));
 227
 228 /*
 229  * Test wait_next() with sync_next() and local barriers
 230  * This is again the same as test_barrier_wait_next_twice_local but uses a
 231  * synced wait as the second wait. This works just fine because the local state
 232  * is in sync with the remote.
 233  */
 234 TEST_BARRIER(test_barrier_wait_next_twice_local_sync,
 235         ({
 236                 sleep_for(BASE_TIME);
 237                 set_alarm(BASE_TIME);
 238                 assert_se(barrier_wait_next(&b));
 239                 assert_se(barrier_place(&b));
 240                 assert_se(barrier_place(&b));
 241                 assert_se(barrier_sync_next(&b));
 242         }),
 243         TEST_BARRIER_WAIT_SUCCESS(pid1),
 244         ({
 245                 set_alarm(BASE_TIME * 10);
 246                 assert_se(barrier_place(&b));
 247                 assert_se(barrier_place(&b));
 248         }),
 249         TEST_BARRIER_WAIT_SUCCESS(pid2));
 250
 251 /*
 252  * Test sync_next() and sync()
 253  * This tests sync_*() synchronizations and makes sure they work fine if the
 254  * local state is behind the remote state.
 255  */
 256 TEST_BARRIER(test_barrier_sync_next,
 257         ({
 258                 set_alarm(BASE_TIME * 10);
 259                 assert_se(barrier_sync_next(&b));
 260                 assert_se(barrier_sync(&b));
 261                 assert_se(barrier_place(&b));
 262                 assert_se(barrier_place(&b));
 263                 assert_se(barrier_sync_next(&b));
 264                 assert_se(barrier_sync_next(&b));
 265                 assert_se(barrier_sync(&b));
 266         }),
 267         TEST_BARRIER_WAIT_SUCCESS(pid1),
 268         ({
 269                 set_alarm(BASE_TIME * 10);
 270                 sleep_for(BASE_TIME);
 271                 assert_se(barrier_place(&b));
 272                 assert_se(barrier_place(&b));
 273                 assert_se(barrier_sync(&b));
 274         }),
 275         TEST_BARRIER_WAIT_SUCCESS(pid2));
 276
 277 /*
 278  * Test sync_next() and sync() with local barriers
 279  * This tests timeouts if sync_*() is used if local barriers are placed but the
 280  * remote didn't place any.
 281  */
 282 TEST_BARRIER(test_barrier_sync_next_local,
 283         ({
 284                 set_alarm(BASE_TIME);
 285                 assert_se(barrier_place(&b));
 286                 assert_se(barrier_sync_next(&b));
 287                 assert_se(0);
 288         }),
 289         TEST_BARRIER_WAIT_ALARM(pid1),
 290         ({
 291                 sleep_for(BASE_TIME * 2);
 292         }),
 293         TEST_BARRIER_WAIT_SUCCESS(pid2));
 294
 295 /*
 296  * Test sync_next() and sync() with local barriers and abortion
 297  * This is the same as test_barrier_sync_next_local but aborts the sync in the
 298  * parent. Therefore, the sync_next() succeeds just fine due to the abortion.
 299  */
 300 TEST_BARRIER(test_barrier_sync_next_local_abort,
 301         ({
 302                 set_alarm(BASE_TIME * 10);
 303                 assert_se(barrier_place(&b));
 304                 assert_se(!barrier_sync_next(&b));
 305         }),
 306         TEST_BARRIER_WAIT_SUCCESS(pid1),
 307         ({
 308                 assert_se(barrier_abort(&b));
 309         }),
 310         TEST_BARRIER_WAIT_SUCCESS(pid2));
 311
 312 /*
 313  * Test matched wait_abortion()
 314  * This runs wait_abortion() with remote abortion.
 315  */
 316 TEST_BARRIER(test_barrier_wait_abortion,
 317         ({
 318                 set_alarm(BASE_TIME * 10);
 319                 assert_se(barrier_wait_abortion(&b));
 320         }),
 321         TEST_BARRIER_WAIT_SUCCESS(pid1),
 322         ({
 323                 assert_se(barrier_abort(&b));
 324         }),
 325         TEST_BARRIER_WAIT_SUCCESS(pid2));
 326
 327 /*
 328  * Test unmatched wait_abortion()
 329  * This runs wait_abortion() without any remote abortion going on. It thus must
 330  * timeout.
 331  */
 332 TEST_BARRIER(test_barrier_wait_abortion_unmatched,
 333         ({
 334                 set_alarm(BASE_TIME);
 335                 assert_se(barrier_wait_abortion(&b));
 336                 assert_se(0);
 337         }),
 338         TEST_BARRIER_WAIT_ALARM(pid1),
 339         ({
 340                 sleep_for(BASE_TIME * 2);
 341         }),
 342         TEST_BARRIER_WAIT_SUCCESS(pid2));
 343
 344 /*
 345  * Test matched wait_abortion() with local abortion
 346  * This runs wait_abortion() with local and remote abortion.
 347  */
 348 TEST_BARRIER(test_barrier_wait_abortion_local,
 349         ({
 350                 set_alarm(BASE_TIME * 10);
 351                 assert_se(barrier_abort(&b));
 352                 assert_se(!barrier_wait_abortion(&b));
 353         }),
 354         TEST_BARRIER_WAIT_SUCCESS(pid1),
 355         ({
 356                 assert_se(barrier_abort(&b));
 357         }),
 358         TEST_BARRIER_WAIT_SUCCESS(pid2));
 359
 360 /*
 361  * Test unmatched wait_abortion() with local abortion
 362  * This runs wait_abortion() with only local abortion. This must time out.
 363  */
 364 TEST_BARRIER(test_barrier_wait_abortion_local_unmatched,
 365         ({
 366                 set_alarm(BASE_TIME);
 367                 assert_se(barrier_abort(&b));
 368                 assert_se(!barrier_wait_abortion(&b));
 369                 assert_se(0);
 370         }),
 371         TEST_BARRIER_WAIT_ALARM(pid1),
 372         ({
 373                 sleep_for(BASE_TIME * 2);
 374         }),
 375         TEST_BARRIER_WAIT_SUCCESS(pid2));
 376
 377 /*
 378  * Test child exit
 379  * Place barrier and sync with the child. The child only exits()s, which should
 380  * cause an implicit abortion and wake the parent.
 381  */
 382 TEST_BARRIER(test_barrier_exit,
 383         ({
 384         }),
 385         TEST_BARRIER_WAIT_SUCCESS(pid1),
 386         ({
 387                 set_alarm(BASE_TIME * 10);
 388                 assert_se(barrier_place(&b));
 389                 assert_se(!barrier_sync(&b));
 390         }),
 391         TEST_BARRIER_WAIT_SUCCESS(pid2));
 392
 393 /*
 394  * Test child exit with sleep
 395  * Same as test_barrier_exit but verifies the test really works due to the
 396  * child-exit. We add a usleep() which triggers the alarm in the parent and
 397  * causes the test to time out.
 398  */
 399 TEST_BARRIER(test_barrier_no_exit,
 400         ({
 401                 sleep_for(BASE_TIME * 2);
 402         }),
 403         TEST_BARRIER_WAIT_SUCCESS(pid1),
 404         ({
 405                 set_alarm(BASE_TIME);
 406                 assert_se(barrier_place(&b));
 407                 assert_se(!barrier_sync(&b));
 408         }),
 409         TEST_BARRIER_WAIT_ALARM(pid2));
 410
 411 /*
 412  * Test pending exit against sync
 413  * The parent places a barrier *and* exits. The 20ms wait in the child
 414  * guarantees both are pending. However, our logic prefers pending barriers over
 415  * pending exit-abortions (unlike normal abortions), thus the wait_next() must
 416  * succeed, same for the sync_next() as our local barrier-count is smaller than
 417  * the remote. Once we place a barrier our count is equal, so the sync still
 418  * succeeds. Only if we place one more barrier, we're ahead of the remote, thus
 419  * we will fail due to HUP on the pipe.
 420  */
 421 TEST_BARRIER(test_barrier_pending_exit,
 422         ({
 423                 set_alarm(BASE_TIME * 4);
 424                 sleep_for(BASE_TIME * 2);
 425                 assert_se(barrier_wait_next(&b));
 426                 assert_se(barrier_sync_next(&b));
 427                 assert_se(barrier_place(&b));
 428                 assert_se(barrier_sync_next(&b));
 429                 assert_se(barrier_place(&b));
 430                 assert_se(!barrier_sync_next(&b));
 431         }),
 432         TEST_BARRIER_WAIT_SUCCESS(pid1),
 433         ({
 434                 assert_se(barrier_place(&b));
 435         }),
 436         TEST_BARRIER_WAIT_SUCCESS(pid2));
 437
 438 int main(int argc, char *argv[]) {
 439         /*
 440          * This test uses real-time alarms and sleeps to test for CPU races
 441          * explicitly. This is highly fragile if your system is under load. We
 442          * already increased the BASE_TIME value to make the tests more robust,
 443          * but that just makes the test take significantly longer. Hence,
 444          * disable the test by default, so it will not break CI.
 445          */
 446         if (argc < 2)
 447                 return EXIT_TEST_SKIP;
 448
 449         log_parse_environment();
 450         log_open();
 451
 452         test_barrier_sync();
 453         test_barrier_wait_next();
 454         test_barrier_wait_next_twice();
 455         test_barrier_wait_next_twice_sync();
 456         test_barrier_wait_next_twice_local();
 457         test_barrier_wait_next_twice_local_sync();
 458         test_barrier_sync_next();
 459         test_barrier_sync_next_local();
 460         test_barrier_sync_next_local_abort();
 461         test_barrier_wait_abortion();
 462         test_barrier_wait_abortion_unmatched();
 463         test_barrier_wait_abortion_local();
 464         test_barrier_wait_abortion_local_unmatched();
 465         test_barrier_exit();
 466         test_barrier_no_exit();
 467         test_barrier_pending_exit();
 468
 469         return 0;
 470 }