kernel/rcu/rcuperf.c

   1 // SPDX-License-Identifier: GPL-2.0+
   2 /*
   3  * Read-Copy Update module-based performance-test facility
   4  *
   5  * Copyright (C) IBM Corporation, 2015
   6  *
   7  * Authors: Paul E. McKenney <paulmck@linux.ibm.com>
   8  */
   9
  10 #define pr_fmt(fmt) fmt
  11
  12 #include <linux/types.h>
  13 #include <linux/kernel.h>
  14 #include <linux/init.h>
  15 #include <linux/mm.h>
  16 #include <linux/module.h>
  17 #include <linux/kthread.h>
  18 #include <linux/err.h>
  19 #include <linux/spinlock.h>
  20 #include <linux/smp.h>
  21 #include <linux/rcupdate.h>
  22 #include <linux/interrupt.h>
  23 #include <linux/sched.h>
  24 #include <uapi/linux/sched/types.h>
  25 #include <linux/atomic.h>
  26 #include <linux/bitops.h>
  27 #include <linux/completion.h>
  28 #include <linux/moduleparam.h>
  29 #include <linux/percpu.h>
  30 #include <linux/notifier.h>
  31 #include <linux/reboot.h>
  32 #include <linux/freezer.h>
  33 #include <linux/cpu.h>
  34 #include <linux/delay.h>
  35 #include <linux/stat.h>
  36 #include <linux/srcu.h>
  37 #include <linux/slab.h>
  38 #include <asm/byteorder.h>
  39 #include <linux/torture.h>
  40 #include <linux/vmalloc.h>
  41
  42 #include "rcu.h"
  43
  44 MODULE_LICENSE("GPL");
  45 MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>");
  46
  47 #define PERF_FLAG "-perf:"
  48 #define PERFOUT_STRING(s) \
  49         pr_alert("%s" PERF_FLAG " %s\n", perf_type, s)
  50 #define VERBOSE_PERFOUT_STRING(s) \
  51         do { if (verbose) pr_alert("%s" PERF_FLAG " %s\n", perf_type, s); } while (0)
  52 #define VERBOSE_PERFOUT_ERRSTRING(s) \
  53         do { if (verbose) pr_alert("%s" PERF_FLAG "!!! %s\n", perf_type, s); } while (0)
  54
  55 /*
  56  * The intended use cases for the nreaders and nwriters module parameters
  57  * are as follows:
  58  *
  59  * 1.   Specify only the nr_cpus kernel boot parameter.  This will
  60  *      set both nreaders and nwriters to the value specified by
  61  *      nr_cpus for a mixed reader/writer test.
  62  *
  63  * 2.   Specify the nr_cpus kernel boot parameter, but set
  64  *      rcuperf.nreaders to zero.  This will set nwriters to the
  65  *      value specified by nr_cpus for an update-only test.
  66  *
  67  * 3.   Specify the nr_cpus kernel boot parameter, but set
  68  *      rcuperf.nwriters to zero.  This will set nreaders to the
  69  *      value specified by nr_cpus for a read-only test.
  70  *
  71  * Various other use cases may of course be specified.
  72  */
  73
  74 #ifdef MODULE
  75 # define RCUPERF_SHUTDOWN 0
  76 #else
  77 # define RCUPERF_SHUTDOWN 1
  78 #endif
  79
  80 torture_param(bool, gp_async, false, "Use asynchronous GP wait primitives");
  81 torture_param(int, gp_async_max, 1000, "Max # outstanding waits per reader");
  82 torture_param(bool, gp_exp, false, "Use expedited GP wait primitives");
  83 torture_param(int, holdoff, 10, "Holdoff time before test start (s)");
  84 torture_param(int, nreaders, -1, "Number of RCU reader threads");
  85 torture_param(int, nwriters, -1, "Number of RCU updater threads");
  86 torture_param(bool, shutdown, RCUPERF_SHUTDOWN,
  87               "Shutdown at end of performance tests.");
  88 torture_param(int, verbose, 1, "Enable verbose debugging printk()s");
  89 torture_param(int, writer_holdoff, 0, "Holdoff (us) between GPs, zero to disable");
  90 torture_param(int, kfree_rcu_test, 0, "Do we run a kfree_rcu() perf test?");
  91
  92 static char *perf_type = "rcu";
  93 module_param(perf_type, charp, 0444);
  94 MODULE_PARM_DESC(perf_type, "Type of RCU to performance-test (rcu, srcu, ...)");
  95
  96 static int nrealreaders;
  97 static int nrealwriters;
  98 static struct task_struct **writer_tasks;
  99 static struct task_struct **reader_tasks;
 100 static struct task_struct *shutdown_task;
 101
 102 static u64 **writer_durations;
 103 static int *writer_n_durations;
 104 static atomic_t n_rcu_perf_reader_started;
 105 static atomic_t n_rcu_perf_writer_started;
 106 static atomic_t n_rcu_perf_writer_finished;
 107 static wait_queue_head_t shutdown_wq;
 108 static u64 t_rcu_perf_writer_started;
 109 static u64 t_rcu_perf_writer_finished;
 110 static unsigned long b_rcu_gp_test_started;
 111 static unsigned long b_rcu_gp_test_finished;
 112 static DEFINE_PER_CPU(atomic_t, n_async_inflight);
 113
 114 #define MAX_MEAS 10000
 115 #define MIN_MEAS 100
 116
 117 /*
 118  * Operations vector for selecting different types of tests.
 119  */
 120
 121 struct rcu_perf_ops {
 122         int ptype;
 123         void (*init)(void);
 124         void (*cleanup)(void);
 125         int (*readlock)(void);
 126         void (*readunlock)(int idx);
 127         unsigned long (*get_gp_seq)(void);
 128         unsigned long (*gp_diff)(unsigned long new, unsigned long old);
 129         unsigned long (*exp_completed)(void);
 130         void (*async)(struct rcu_head *head, rcu_callback_t func);
 131         void (*gp_barrier)(void);
 132         void (*sync)(void);
 133         void (*exp_sync)(void);
 134         const char *name;
 135 };
 136
 137 static struct rcu_perf_ops *cur_ops;
 138
 139 /*
 140  * Definitions for rcu perf testing.
 141  */
 142
 143 static int rcu_perf_read_lock(void) __acquires(RCU)
 144 {
 145         rcu_read_lock();
 146         return 0;
 147 }
 148
 149 static void rcu_perf_read_unlock(int idx) __releases(RCU)
 150 {
 151         rcu_read_unlock();
 152 }
 153
 154 static unsigned long __maybe_unused rcu_no_completed(void)
 155 {
 156         return 0;
 157 }
 158
 159 static void rcu_sync_perf_init(void)
 160 {
 161 }
 162
 163 static struct rcu_perf_ops rcu_ops = {
 164         .ptype          = RCU_FLAVOR,
 165         .init           = rcu_sync_perf_init,
 166         .readlock       = rcu_perf_read_lock,
 167         .readunlock     = rcu_perf_read_unlock,
 168         .get_gp_seq     = rcu_get_gp_seq,
 169         .gp_diff        = rcu_seq_diff,
 170         .exp_completed  = rcu_exp_batches_completed,
 171         .async          = call_rcu,
 172         .gp_barrier     = rcu_barrier,
 173         .sync           = synchronize_rcu,
 174         .exp_sync       = synchronize_rcu_expedited,
 175         .name           = "rcu"
 176 };
 177
 178 /*
 179  * Definitions for srcu perf testing.
 180  */
 181
 182 DEFINE_STATIC_SRCU(srcu_ctl_perf);
 183 static struct srcu_struct *srcu_ctlp = &srcu_ctl_perf;
 184
 185 static int srcu_perf_read_lock(void) __acquires(srcu_ctlp)
 186 {
 187         return srcu_read_lock(srcu_ctlp);
 188 }
 189
 190 static void srcu_perf_read_unlock(int idx) __releases(srcu_ctlp)
 191 {
 192         srcu_read_unlock(srcu_ctlp, idx);
 193 }
 194
 195 static unsigned long srcu_perf_completed(void)
 196 {
 197         return srcu_batches_completed(srcu_ctlp);
 198 }
 199
 200 static void srcu_call_rcu(struct rcu_head *head, rcu_callback_t func)
 201 {
 202         call_srcu(srcu_ctlp, head, func);
 203 }
 204
 205 static void srcu_rcu_barrier(void)
 206 {
 207         srcu_barrier(srcu_ctlp);
 208 }
 209
 210 static void srcu_perf_synchronize(void)
 211 {
 212         synchronize_srcu(srcu_ctlp);
 213 }
 214
 215 static void srcu_perf_synchronize_expedited(void)
 216 {
 217         synchronize_srcu_expedited(srcu_ctlp);
 218 }
 219
 220 static struct rcu_perf_ops srcu_ops = {
 221         .ptype          = SRCU_FLAVOR,
 222         .init           = rcu_sync_perf_init,
 223         .readlock       = srcu_perf_read_lock,
 224         .readunlock     = srcu_perf_read_unlock,
 225         .get_gp_seq     = srcu_perf_completed,
 226         .gp_diff        = rcu_seq_diff,
 227         .exp_completed  = srcu_perf_completed,
 228         .async          = srcu_call_rcu,
 229         .gp_barrier     = srcu_rcu_barrier,
 230         .sync           = srcu_perf_synchronize,
 231         .exp_sync       = srcu_perf_synchronize_expedited,
 232         .name           = "srcu"
 233 };
 234
 235 static struct srcu_struct srcud;
 236
 237 static void srcu_sync_perf_init(void)
 238 {
 239         srcu_ctlp = &srcud;
 240         init_srcu_struct(srcu_ctlp);
 241 }
 242
 243 static void srcu_sync_perf_cleanup(void)
 244 {
 245         cleanup_srcu_struct(srcu_ctlp);
 246 }
 247
 248 static struct rcu_perf_ops srcud_ops = {
 249         .ptype          = SRCU_FLAVOR,
 250         .init           = srcu_sync_perf_init,
 251         .cleanup        = srcu_sync_perf_cleanup,
 252         .readlock       = srcu_perf_read_lock,
 253         .readunlock     = srcu_perf_read_unlock,
 254         .get_gp_seq     = srcu_perf_completed,
 255         .gp_diff        = rcu_seq_diff,
 256         .exp_completed  = srcu_perf_completed,
 257         .async          = srcu_call_rcu,
 258         .gp_barrier     = srcu_rcu_barrier,
 259         .sync           = srcu_perf_synchronize,
 260         .exp_sync       = srcu_perf_synchronize_expedited,
 261         .name           = "srcud"
 262 };
 263
 264 /*
 265  * Definitions for RCU-tasks perf testing.
 266  */
 267
 268 static int tasks_perf_read_lock(void)
 269 {
 270         return 0;
 271 }
 272
 273 static void tasks_perf_read_unlock(int idx)
 274 {
 275 }
 276
 277 static struct rcu_perf_ops tasks_ops = {
 278         .ptype          = RCU_TASKS_FLAVOR,
 279         .init           = rcu_sync_perf_init,
 280         .readlock       = tasks_perf_read_lock,
 281         .readunlock     = tasks_perf_read_unlock,
 282         .get_gp_seq     = rcu_no_completed,
 283         .gp_diff        = rcu_seq_diff,
 284         .async          = call_rcu_tasks,
 285         .gp_barrier     = rcu_barrier_tasks,
 286         .sync           = synchronize_rcu_tasks,
 287         .exp_sync       = synchronize_rcu_tasks,
 288         .name           = "tasks"
 289 };
 290
 291 static unsigned long rcuperf_seq_diff(unsigned long new, unsigned long old)
 292 {
 293         if (!cur_ops->gp_diff)
 294                 return new - old;
 295         return cur_ops->gp_diff(new, old);
 296 }
 297
 298 /*
 299  * If performance tests complete, wait for shutdown to commence.
 300  */
 301 static void rcu_perf_wait_shutdown(void)
 302 {
 303         cond_resched_tasks_rcu_qs();
 304         if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
 305                 return;
 306         while (!torture_must_stop())
 307                 schedule_timeout_uninterruptible(1);
 308 }
 309
 310 /*
 311  * RCU perf reader kthread.  Repeatedly does empty RCU read-side
 312  * critical section, minimizing update-side interference.
 313  */
 314 static int
 315 rcu_perf_reader(void *arg)
 316 {
 317         unsigned long flags;
 318         int idx;
 319         long me = (long)arg;
 320
 321         VERBOSE_PERFOUT_STRING("rcu_perf_reader task started");
 322         set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
 323         set_user_nice(current, MAX_NICE);
 324         atomic_inc(&n_rcu_perf_reader_started);
 325
 326         do {
 327                 local_irq_save(flags);
 328                 idx = cur_ops->readlock();
 329                 cur_ops->readunlock(idx);
 330                 local_irq_restore(flags);
 331                 rcu_perf_wait_shutdown();
 332         } while (!torture_must_stop());
 333         torture_kthread_stopping("rcu_perf_reader");
 334         return 0;
 335 }
 336
 337 /*
 338  * Callback function for asynchronous grace periods from rcu_perf_writer().
 339  */
 340 static void rcu_perf_async_cb(struct rcu_head *rhp)
 341 {
 342         atomic_dec(this_cpu_ptr(&n_async_inflight));
 343         kfree(rhp);
 344 }
 345
 346 /*
 347  * RCU perf writer kthread.  Repeatedly does a grace period.
 348  */
 349 static int
 350 rcu_perf_writer(void *arg)
 351 {
 352         int i = 0;
 353         int i_max;
 354         long me = (long)arg;
 355         struct rcu_head *rhp = NULL;
 356         struct sched_param sp;
 357         bool started = false, done = false, alldone = false;
 358         u64 t;
 359         u64 *wdp;
 360         u64 *wdpp = writer_durations[me];
 361
 362         VERBOSE_PERFOUT_STRING("rcu_perf_writer task started");
 363         WARN_ON(!wdpp);
 364         set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
 365         sp.sched_priority = 1;
 366         sched_setscheduler_nocheck(current, SCHED_FIFO, &sp);
 367
 368         if (holdoff)
 369                 schedule_timeout_uninterruptible(holdoff * HZ);
 370
 371         /*
 372          * Wait until rcu_end_inkernel_boot() is called for normal GP tests
 373          * so that RCU is not always expedited for normal GP tests.
 374          * The system_state test is approximate, but works well in practice.
 375          */
 376         while (!gp_exp && system_state != SYSTEM_RUNNING)
 377                 schedule_timeout_uninterruptible(1);
 378
 379         t = ktime_get_mono_fast_ns();
 380         if (atomic_inc_return(&n_rcu_perf_writer_started) >= nrealwriters) {
 381                 t_rcu_perf_writer_started = t;
 382                 if (gp_exp) {
 383                         b_rcu_gp_test_started =
 384                                 cur_ops->exp_completed() / 2;
 385                 } else {
 386                         b_rcu_gp_test_started = cur_ops->get_gp_seq();
 387                 }
 388         }
 389
 390         do {
 391                 if (writer_holdoff)
 392                         udelay(writer_holdoff);
 393                 wdp = &wdpp[i];
 394                 *wdp = ktime_get_mono_fast_ns();
 395                 if (gp_async) {
 396 retry:
 397                         if (!rhp)
 398                                 rhp = kmalloc(sizeof(*rhp), GFP_KERNEL);
 399                         if (rhp && atomic_read(this_cpu_ptr(&n_async_inflight)) < gp_async_max) {
 400                                 atomic_inc(this_cpu_ptr(&n_async_inflight));
 401                                 cur_ops->async(rhp, rcu_perf_async_cb);
 402                                 rhp = NULL;
 403                         } else if (!kthread_should_stop()) {
 404                                 cur_ops->gp_barrier();
 405                                 goto retry;
 406                         } else {
 407                                 kfree(rhp); /* Because we are stopping. */
 408                         }
 409                 } else if (gp_exp) {
 410                         cur_ops->exp_sync();
 411                 } else {
 412                         cur_ops->sync();
 413                 }
 414                 t = ktime_get_mono_fast_ns();
 415                 *wdp = t - *wdp;
 416                 i_max = i;
 417                 if (!started &&
 418                     atomic_read(&n_rcu_perf_writer_started) >= nrealwriters)
 419                         started = true;
 420                 if (!done && i >= MIN_MEAS) {
 421                         done = true;
 422                         sp.sched_priority = 0;
 423                         sched_setscheduler_nocheck(current,
 424                                                    SCHED_NORMAL, &sp);
 425                         pr_alert("%s%s rcu_perf_writer %ld has %d measurements\n",
 426                                  perf_type, PERF_FLAG, me, MIN_MEAS);
 427                         if (atomic_inc_return(&n_rcu_perf_writer_finished) >=
 428                             nrealwriters) {
 429                                 schedule_timeout_interruptible(10);
 430                                 rcu_ftrace_dump(DUMP_ALL);
 431                                 PERFOUT_STRING("Test complete");
 432                                 t_rcu_perf_writer_finished = t;
 433                                 if (gp_exp) {
 434                                         b_rcu_gp_test_finished =
 435                                                 cur_ops->exp_completed() / 2;
 436                                 } else {
 437                                         b_rcu_gp_test_finished =
 438                                                 cur_ops->get_gp_seq();
 439                                 }
 440                                 if (shutdown) {
 441                                         smp_mb(); /* Assign before wake. */
 442                                         wake_up(&shutdown_wq);
 443                                 }
 444                         }
 445                 }
 446                 if (done && !alldone &&
 447                     atomic_read(&n_rcu_perf_writer_finished) >= nrealwriters)
 448                         alldone = true;
 449                 if (started && !alldone && i < MAX_MEAS - 1)
 450                         i++;
 451                 rcu_perf_wait_shutdown();
 452         } while (!torture_must_stop());
 453         if (gp_async) {
 454                 cur_ops->gp_barrier();
 455         }
 456         writer_n_durations[me] = i_max;
 457         torture_kthread_stopping("rcu_perf_writer");
 458         return 0;
 459 }
 460
 461 static void
 462 rcu_perf_print_module_parms(struct rcu_perf_ops *cur_ops, const char *tag)
 463 {
 464         pr_alert("%s" PERF_FLAG
 465                  "--- %s: nreaders=%d nwriters=%d verbose=%d shutdown=%d\n",
 466                  perf_type, tag, nrealreaders, nrealwriters, verbose, shutdown);
 467 }
 468
 469 static void
 470 rcu_perf_cleanup(void)
 471 {
 472         int i;
 473         int j;
 474         int ngps = 0;
 475         u64 *wdp;
 476         u64 *wdpp;
 477
 478         /*
 479          * Would like warning at start, but everything is expedited
 480          * during the mid-boot phase, so have to wait till the end.
 481          */
 482         if (rcu_gp_is_expedited() && !rcu_gp_is_normal() && !gp_exp)
 483                 VERBOSE_PERFOUT_ERRSTRING("All grace periods expedited, no normal ones to measure!");
 484         if (rcu_gp_is_normal() && gp_exp)
 485                 VERBOSE_PERFOUT_ERRSTRING("All grace periods normal, no expedited ones to measure!");
 486         if (gp_exp && gp_async)
 487                 VERBOSE_PERFOUT_ERRSTRING("No expedited async GPs, so went with async!");
 488
 489         if (torture_cleanup_begin())
 490                 return;
 491         if (!cur_ops) {
 492                 torture_cleanup_end();
 493                 return;
 494         }
 495
 496         if (reader_tasks) {
 497                 for (i = 0; i < nrealreaders; i++)
 498                         torture_stop_kthread(rcu_perf_reader,
 499                                              reader_tasks[i]);
 500                 kfree(reader_tasks);
 501         }
 502
 503         if (writer_tasks) {
 504                 for (i = 0; i < nrealwriters; i++) {
 505                         torture_stop_kthread(rcu_perf_writer,
 506                                              writer_tasks[i]);
 507                         if (!writer_n_durations)
 508                                 continue;
 509                         j = writer_n_durations[i];
 510                         pr_alert("%s%s writer %d gps: %d\n",
 511                                  perf_type, PERF_FLAG, i, j);
 512                         ngps += j;
 513                 }
 514                 pr_alert("%s%s start: %llu end: %llu duration: %llu gps: %d batches: %ld\n",
 515                          perf_type, PERF_FLAG,
 516                          t_rcu_perf_writer_started, t_rcu_perf_writer_finished,
 517                          t_rcu_perf_writer_finished -
 518                          t_rcu_perf_writer_started,
 519                          ngps,
 520                          rcuperf_seq_diff(b_rcu_gp_test_finished,
 521                                           b_rcu_gp_test_started));
 522                 for (i = 0; i < nrealwriters; i++) {
 523                         if (!writer_durations)
 524                                 break;
 525                         if (!writer_n_durations)
 526                                 continue;
 527                         wdpp = writer_durations[i];
 528                         if (!wdpp)
 529                                 continue;
 530                         for (j = 0; j <= writer_n_durations[i]; j++) {
 531                                 wdp = &wdpp[j];
 532                                 pr_alert("%s%s %4d writer-duration: %5d %llu\n",
 533                                         perf_type, PERF_FLAG,
 534                                         i, j, *wdp);
 535                                 if (j % 100 == 0)
 536                                         schedule_timeout_uninterruptible(1);
 537                         }
 538                         kfree(writer_durations[i]);
 539                 }
 540                 kfree(writer_tasks);
 541                 kfree(writer_durations);
 542                 kfree(writer_n_durations);
 543         }
 544
 545         /* Do torture-type-specific cleanup operations.  */
 546         if (cur_ops->cleanup != NULL)
 547                 cur_ops->cleanup();
 548
 549         torture_cleanup_end();
 550 }
 551
 552 /*
 553  * Return the number if non-negative.  If -1, the number of CPUs.
 554  * If less than -1, that much less than the number of CPUs, but
 555  * at least one.
 556  */
 557 static int compute_real(int n)
 558 {
 559         int nr;
 560
 561         if (n >= 0) {
 562                 nr = n;
 563         } else {
 564                 nr = num_online_cpus() + 1 + n;
 565                 if (nr <= 0)
 566                         nr = 1;
 567         }
 568         return nr;
 569 }
 570
 571 /*
 572  * RCU perf shutdown kthread.  Just waits to be awakened, then shuts
 573  * down system.
 574  */
 575 static int
 576 rcu_perf_shutdown(void *arg)
 577 {
 578         do {
 579                 wait_event(shutdown_wq,
 580                            atomic_read(&n_rcu_perf_writer_finished) >=
 581                            nrealwriters);
 582         } while (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters);
 583         smp_mb(); /* Wake before output. */
 584         rcu_perf_cleanup();
 585         kernel_power_off();
 586         return -EINVAL;
 587 }
 588
 589 /*
 590  * kfree_rcu() performance tests: Start a kfree_rcu() loop on all CPUs for number
 591  * of iterations and measure total time and number of GP for all iterations to complete.
 592  */
 593
 594 torture_param(int, kfree_nthreads, -1, "Number of threads running loops of kfree_rcu().");
 595 torture_param(int, kfree_alloc_num, 8000, "Number of allocations and frees done in an iteration.");
 596 torture_param(int, kfree_loops, 10, "Number of loops doing kfree_alloc_num allocations and frees.");
 597
 598 static struct task_struct **kfree_reader_tasks;
 599 static int kfree_nrealthreads;
 600 static atomic_t n_kfree_perf_thread_started;
 601 static atomic_t n_kfree_perf_thread_ended;
 602
 603 struct kfree_obj {
 604         char kfree_obj[8];
 605         struct rcu_head rh;
 606 };
 607
 608 static int
 609 kfree_perf_thread(void *arg)
 610 {
 611         int i, loop = 0;
 612         long me = (long)arg;
 613         struct kfree_obj *alloc_ptr;
 614         u64 start_time, end_time;
 615         long long mem_begin, mem_during = 0;
 616
 617         VERBOSE_PERFOUT_STRING("kfree_perf_thread task started");
 618         set_cpus_allowed_ptr(current, cpumask_of(me % nr_cpu_ids));
 619         set_user_nice(current, MAX_NICE);
 620
 621         start_time = ktime_get_mono_fast_ns();
 622
 623         if (atomic_inc_return(&n_kfree_perf_thread_started) >= kfree_nrealthreads) {
 624                 if (gp_exp)
 625                         b_rcu_gp_test_started = cur_ops->exp_completed() / 2;
 626                 else
 627                         b_rcu_gp_test_started = cur_ops->get_gp_seq();
 628         }
 629
 630         do {
 631                 if (!mem_during) {
 632                         mem_during = mem_begin = si_mem_available();
 633                 } else if (loop % (kfree_loops / 4) == 0) {
 634                         mem_during = (mem_during + si_mem_available()) / 2;
 635                 }
 636
 637                 for (i = 0; i < kfree_alloc_num; i++) {
 638                         alloc_ptr = kmalloc(sizeof(struct kfree_obj), GFP_KERNEL);
 639                         if (!alloc_ptr)
 640                                 return -ENOMEM;
 641
 642                         kfree_rcu(alloc_ptr, rh);
 643                 }
 644
 645                 cond_resched();
 646         } while (!torture_must_stop() && ++loop < kfree_loops);
 647
 648         if (atomic_inc_return(&n_kfree_perf_thread_ended) >= kfree_nrealthreads) {
 649                 end_time = ktime_get_mono_fast_ns();
 650
 651                 if (gp_exp)
 652                         b_rcu_gp_test_finished = cur_ops->exp_completed() / 2;
 653                 else
 654                         b_rcu_gp_test_finished = cur_ops->get_gp_seq();
 655
 656                 pr_alert("Total time taken by all kfree'ers: %llu ns, loops: %d, batches: %ld, memory footprint: %lldMB\n",
 657                        (unsigned long long)(end_time - start_time), kfree_loops,
 658                        rcuperf_seq_diff(b_rcu_gp_test_finished, b_rcu_gp_test_started),
 659                        (mem_begin - mem_during) >> (20 - PAGE_SHIFT));
 660
 661                 if (shutdown) {
 662                         smp_mb(); /* Assign before wake. */
 663                         wake_up(&shutdown_wq);
 664                 }
 665         }
 666
 667         torture_kthread_stopping("kfree_perf_thread");
 668         return 0;
 669 }
 670
 671 static void
 672 kfree_perf_cleanup(void)
 673 {
 674         int i;
 675
 676         if (torture_cleanup_begin())
 677                 return;
 678
 679         if (kfree_reader_tasks) {
 680                 for (i = 0; i < kfree_nrealthreads; i++)
 681                         torture_stop_kthread(kfree_perf_thread,
 682                                              kfree_reader_tasks[i]);
 683                 kfree(kfree_reader_tasks);
 684         }
 685
 686         torture_cleanup_end();
 687 }
 688
 689 /*
 690  * shutdown kthread.  Just waits to be awakened, then shuts down system.
 691  */
 692 static int
 693 kfree_perf_shutdown(void *arg)
 694 {
 695         do {
 696                 wait_event(shutdown_wq,
 697                            atomic_read(&n_kfree_perf_thread_ended) >=
 698                            kfree_nrealthreads);
 699         } while (atomic_read(&n_kfree_perf_thread_ended) < kfree_nrealthreads);
 700
 701         smp_mb(); /* Wake before output. */
 702
 703         kfree_perf_cleanup();
 704         kernel_power_off();
 705         return -EINVAL;
 706 }
 707
 708 static int __init
 709 kfree_perf_init(void)
 710 {
 711         long i;
 712         int firsterr = 0;
 713
 714         kfree_nrealthreads = compute_real(kfree_nthreads);
 715         /* Start up the kthreads. */
 716         if (shutdown) {
 717                 init_waitqueue_head(&shutdown_wq);
 718                 firsterr = torture_create_kthread(kfree_perf_shutdown, NULL,
 719                                                   shutdown_task);
 720                 if (firsterr)
 721                         goto unwind;
 722                 schedule_timeout_uninterruptible(1);
 723         }
 724
 725         kfree_reader_tasks = kcalloc(kfree_nrealthreads, sizeof(kfree_reader_tasks[0]),
 726                                GFP_KERNEL);
 727         if (kfree_reader_tasks == NULL) {
 728                 firsterr = -ENOMEM;
 729                 goto unwind;
 730         }
 731
 732         for (i = 0; i < kfree_nrealthreads; i++) {
 733                 firsterr = torture_create_kthread(kfree_perf_thread, (void *)i,
 734                                                   kfree_reader_tasks[i]);
 735                 if (firsterr)
 736                         goto unwind;
 737         }
 738
 739         while (atomic_read(&n_kfree_perf_thread_started) < kfree_nrealthreads)
 740                 schedule_timeout_uninterruptible(1);
 741
 742         torture_init_end();
 743         return 0;
 744
 745 unwind:
 746         torture_init_end();
 747         kfree_perf_cleanup();
 748         return firsterr;
 749 }
 750
 751 static int __init
 752 rcu_perf_init(void)
 753 {
 754         long i;
 755         int firsterr = 0;
 756         static struct rcu_perf_ops *perf_ops[] = {
 757                 &rcu_ops, &srcu_ops, &srcud_ops, &tasks_ops,
 758         };
 759
 760         if (!torture_init_begin(perf_type, verbose))
 761                 return -EBUSY;
 762
 763         /* Process args and tell the world that the perf'er is on the job. */
 764         for (i = 0; i < ARRAY_SIZE(perf_ops); i++) {
 765                 cur_ops = perf_ops[i];
 766                 if (strcmp(perf_type, cur_ops->name) == 0)
 767                         break;
 768         }
 769         if (i == ARRAY_SIZE(perf_ops)) {
 770                 pr_alert("rcu-perf: invalid perf type: \"%s\"\n", perf_type);
 771                 pr_alert("rcu-perf types:");
 772                 for (i = 0; i < ARRAY_SIZE(perf_ops); i++)
 773                         pr_cont(" %s", perf_ops[i]->name);
 774                 pr_cont("\n");
 775                 WARN_ON(!IS_MODULE(CONFIG_RCU_PERF_TEST));
 776                 firsterr = -EINVAL;
 777                 cur_ops = NULL;
 778                 goto unwind;
 779         }
 780         if (cur_ops->init)
 781                 cur_ops->init();
 782
 783         if (kfree_rcu_test)
 784                 return kfree_perf_init();
 785
 786         nrealwriters = compute_real(nwriters);
 787         nrealreaders = compute_real(nreaders);
 788         atomic_set(&n_rcu_perf_reader_started, 0);
 789         atomic_set(&n_rcu_perf_writer_started, 0);
 790         atomic_set(&n_rcu_perf_writer_finished, 0);
 791         rcu_perf_print_module_parms(cur_ops, "Start of test");
 792
 793         /* Start up the kthreads. */
 794
 795         if (shutdown) {
 796                 init_waitqueue_head(&shutdown_wq);
 797                 firsterr = torture_create_kthread(rcu_perf_shutdown, NULL,
 798                                                   shutdown_task);
 799                 if (firsterr)
 800                         goto unwind;
 801                 schedule_timeout_uninterruptible(1);
 802         }
 803         reader_tasks = kcalloc(nrealreaders, sizeof(reader_tasks[0]),
 804                                GFP_KERNEL);
 805         if (reader_tasks == NULL) {
 806                 VERBOSE_PERFOUT_ERRSTRING("out of memory");
 807                 firsterr = -ENOMEM;
 808                 goto unwind;
 809         }
 810         for (i = 0; i < nrealreaders; i++) {
 811                 firsterr = torture_create_kthread(rcu_perf_reader, (void *)i,
 812                                                   reader_tasks[i]);
 813                 if (firsterr)
 814                         goto unwind;
 815         }
 816         while (atomic_read(&n_rcu_perf_reader_started) < nrealreaders)
 817                 schedule_timeout_uninterruptible(1);
 818         writer_tasks = kcalloc(nrealwriters, sizeof(reader_tasks[0]),
 819                                GFP_KERNEL);
 820         writer_durations = kcalloc(nrealwriters, sizeof(*writer_durations),
 821                                    GFP_KERNEL);
 822         writer_n_durations =
 823                 kcalloc(nrealwriters, sizeof(*writer_n_durations),
 824                         GFP_KERNEL);
 825         if (!writer_tasks || !writer_durations || !writer_n_durations) {
 826                 VERBOSE_PERFOUT_ERRSTRING("out of memory");
 827                 firsterr = -ENOMEM;
 828                 goto unwind;
 829         }
 830         for (i = 0; i < nrealwriters; i++) {
 831                 writer_durations[i] =
 832                         kcalloc(MAX_MEAS, sizeof(*writer_durations[i]),
 833                                 GFP_KERNEL);
 834                 if (!writer_durations[i]) {
 835                         firsterr = -ENOMEM;
 836                         goto unwind;
 837                 }
 838                 firsterr = torture_create_kthread(rcu_perf_writer, (void *)i,
 839                                                   writer_tasks[i]);
 840                 if (firsterr)
 841                         goto unwind;
 842         }
 843         torture_init_end();
 844         return 0;
 845
 846 unwind:
 847         torture_init_end();
 848         rcu_perf_cleanup();
 849         return firsterr;
 850 }
 851
 852 module_init(rcu_perf_init);
 853 module_exit(rcu_perf_cleanup);