]> git.ipfire.org Git - thirdparty/glibc.git/blame - nptl/perf.c
Prefer https to http for gnu.org and fsf.org URLs
[thirdparty/glibc.git] / nptl / perf.c
CommitLineData
04277e02 1/* Copyright (C) 2002-2019 Free Software Foundation, Inc.
76a50749
UD
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
59ba27a6 16 License along with the GNU C Library; if not, see
5a82c748 17 <https://www.gnu.org/licenses/>. */
76a50749
UD
18
19#define _GNU_SOURCE 1
20#include <argp.h>
21#include <error.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <limits.h>
26#include <pthread.h>
27#include <signal.h>
28#include <stdbool.h>
29#include <stdlib.h>
30#include <string.h>
31#include <time.h>
32#include <unistd.h>
33#include <sys/param.h>
34#include <sys/types.h>
35
36#ifndef MAX_THREADS
37# define MAX_THREADS 100000
38#endif
39#ifndef DEFAULT_THREADS
40# define DEFAULT_THREADS 50
41#endif
42
43
44#define OPT_TO_THREAD 300
45#define OPT_TO_PROCESS 301
46#define OPT_SYNC_SIGNAL 302
47#define OPT_SYNC_JOIN 303
48#define OPT_TOPLEVEL 304
49
50
51static const struct argp_option options[] =
52 {
53 { NULL, 0, NULL, 0, "\
54This is a test for threads so we allow ther user to selection the number of \
55threads which are used at any one time. Independently the total number of \
56rounds can be selected. This is the total number of threads which will have \
57run when the process terminates:" },
58 { "threads", 't', "NUMBER", 0, "Number of threads used at once" },
59 { "starts", 's', "NUMBER", 0, "Total number of working threads" },
60 { "toplevel", OPT_TOPLEVEL, "NUMBER", 0,
61 "Number of toplevel threads which start the other threads; this \
62implies --sync-join" },
63
64 { NULL, 0, NULL, 0, "\
65Each thread can do one of two things: sleep or do work. The latter is 100% \
66CPU bound. The work load is the probability a thread does work. All values \
67from zero to 100 (inclusive) are valid. How often each thread repeats this \
68can be determined by the number of rounds. The work cost determines how long \
69each work session (not sleeping) takes. If it is zero a thread would \
70effectively nothing. By setting the number of rounds to zero the thread \
71does no work at all and pure thread creation times can be measured." },
72 { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" },
73 { "workcost", 'c', "NUMBER", 0,
74 "Factor in the cost of each round of working" },
75 { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" },
76
77 { NULL, 0, NULL, 0, "\
78There are a number of different methods how thread creation can be \
79synchronized. Synchronization is necessary since the number of concurrently \
80running threads is limited." },
81 { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0,
82 "Synchronize using a signal (default)" },
83 { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" },
84
85 { NULL, 0, NULL, 0, "\
86One parameter for each threads execution is the size of the stack. If this \
87parameter is not used the system's default stack size is used. If many \
88threads are used the stack size should be chosen quite small." },
89 { "stacksize", 'S', "BYTES", 0, "Size of threads stack" },
90 { "guardsize", 'g', "BYTES", 0,
91 "Size of stack guard area; must fit into the stack" },
92
93 { NULL, 0, NULL, 0, "Signal options:" },
94 { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" },
95 { "to-process", OPT_TO_PROCESS, NULL, 0,
96 "Send signal to process (default)" },
97
98 { NULL, 0, NULL, 0, "Administrative options:" },
99 { "progress", 'p', NULL, 0, "Show signs of progress" },
100 { "timing", 'T', NULL, 0,
101 "Measure time from startup to the last thread finishing" },
102 { NULL, 0, NULL, 0, NULL }
103 };
104
105/* Prototype for option handler. */
106static error_t parse_opt (int key, char *arg, struct argp_state *state);
107
108/* Data structure to communicate with argp functions. */
109static struct argp argp =
110{
111 options, parse_opt
112};
113
114
115static unsigned long int threads = DEFAULT_THREADS;
116static unsigned long int workload = 75;
117static unsigned long int workcost = 20;
118static unsigned long int rounds = 10;
119static long int starts = 5000;
120static unsigned long int stacksize;
121static long int guardsize = -1;
122static bool progress;
123static bool timing;
124static bool to_thread;
125static unsigned long int toplevel = 1;
126
127
128static long int running;
129static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER;
130
131static pid_t pid;
132static pthread_t tmain;
133
134static clockid_t cl;
135static struct timespec start_time;
136
137
138static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER;
139unsigned int sum;
140
141static enum
142 {
143 sync_signal,
144 sync_join
145 }
146sync_method;
147
148
149/* We use 64bit values for the times. */
150typedef unsigned long long int hp_timing_t;
151
152
153/* Attributes for all created threads. */
154static pthread_attr_t attr;
155
156
157static void *
158work (void *arg)
159{
160 unsigned long int i;
161 unsigned int state = (unsigned long int) arg;
162
163 for (i = 0; i < rounds; ++i)
164 {
165 /* Determine what to do. */
166 unsigned int rnum;
167
00a13bbd 168 /* Uniform distribution. */
76a50749
UD
169 do
170 rnum = rand_r (&state);
171 while (rnum >= UINT_MAX - (UINT_MAX % 100));
172
173 rnum %= 100;
174
175 if (rnum < workload)
176 {
177 int j;
178 int a[4] = { i, rnum, i + rnum, rnum - i };
179
180 if (progress)
181 write (STDERR_FILENO, "c", 1);
182
183 for (j = 0; j < workcost; ++j)
184 {
185 a[0] += a[3] >> 12;
186 a[1] += a[2] >> 20;
187 a[2] += a[1] ^ 0x3423423;
188 a[3] += a[0] - a[1];
189 }
190
191 pthread_mutex_lock (&sum_mutex);
192 sum += a[0] + a[1] + a[2] + a[3];
193 pthread_mutex_unlock (&sum_mutex);
194 }
195 else
196 {
197 /* Just sleep. */
198 struct timespec tv;
199
200 tv.tv_sec = 0;
201 tv.tv_nsec = 10000000;
202
203 if (progress)
204 write (STDERR_FILENO, "w", 1);
205
206 nanosleep (&tv, NULL);
207 }
208 }
209
210 return NULL;
211}
212
213
214static void *
215thread_function (void *arg)
216{
217 work (arg);
218
219 pthread_mutex_lock (&running_mutex);
220 if (--running <= 0 && starts <= 0)
221 {
222 /* We are done. */
223 if (progress)
224 write (STDERR_FILENO, "\n", 1);
225
226 if (timing)
227 {
228 struct timespec end_time;
229
230 if (clock_gettime (cl, &end_time) == 0)
231 {
232 end_time.tv_sec -= start_time.tv_sec;
233 end_time.tv_nsec -= start_time.tv_nsec;
234 if (end_time.tv_nsec < 0)
235 {
236 end_time.tv_nsec += 1000000000;
237 --end_time.tv_sec;
238 }
239
240 printf ("\nRuntime: %lu.%09lu seconds\n",
241 (unsigned long int) end_time.tv_sec,
242 (unsigned long int) end_time.tv_nsec);
243 }
244 }
245
246 printf ("Result: %08x\n", sum);
247
248 exit (0);
249 }
250 pthread_mutex_unlock (&running_mutex);
251
252 if (sync_method == sync_signal)
253 {
254 if (to_thread)
255 /* This code sends a signal to the main thread. */
256 pthread_kill (tmain, SIGUSR1);
257 else
258 /* Use this code to test sending a signal to the process. */
259 kill (pid, SIGUSR1);
260 }
261
262 if (progress)
263 write (STDERR_FILENO, "f", 1);
264
265 return NULL;
266}
267
268
269struct start_info
270{
271 unsigned int starts;
272 unsigned int threads;
273};
274
275
276static void *
277start_threads (void *arg)
278{
279 struct start_info *si = arg;
280 unsigned int starts = si->starts;
281 pthread_t ths[si->threads];
282 unsigned int state = starts;
283 unsigned int n;
284 unsigned int i = 0;
285 int err;
286
287 if (progress)
288 write (STDERR_FILENO, "T", 1);
289
290 memset (ths, '\0', sizeof (pthread_t) * si->threads);
291
292 while (starts-- > 0)
293 {
294 if (ths[i] != 0)
295 {
296 /* Wait for the threads in the order they were created. */
297 err = pthread_join (ths[i], NULL);
298 if (err != 0)
299 error (EXIT_FAILURE, err, "cannot join thread");
300
301 if (progress)
302 write (STDERR_FILENO, "f", 1);
303 }
304
305 err = pthread_create (&ths[i], &attr, work,
ff48874d 306 (void *) (long) (rand_r (&state) + starts + i));
76a50749
UD
307
308 if (err != 0)
309 error (EXIT_FAILURE, err, "cannot start thread");
310
311 if (progress)
312 write (STDERR_FILENO, "t", 1);
313
314 if (++i == si->threads)
315 i = 0;
316 }
317
318 n = i;
319 do
320 {
321 if (ths[i] != 0)
322 {
323 err = pthread_join (ths[i], NULL);
324 if (err != 0)
325 error (EXIT_FAILURE, err, "cannot join thread");
326
327 if (progress)
328 write (STDERR_FILENO, "f", 1);
329 }
330
331 if (++i == si->threads)
332 i = 0;
333 }
334 while (i != n);
335
336 if (progress)
337 write (STDERR_FILENO, "F", 1);
338
339 return NULL;
340}
341
342
343int
344main (int argc, char *argv[])
345{
346 int remaining;
347 sigset_t ss;
348 pthread_t th;
349 pthread_t *ths = NULL;
350 int empty = 0;
351 int last;
352 bool cont = true;
353
354 /* Parse and process arguments. */
355 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
356
357 if (sync_method == sync_join)
358 {
359 ths = (pthread_t *) calloc (threads, sizeof (pthread_t));
360 if (ths == NULL)
361 error (EXIT_FAILURE, errno,
362 "cannot allocate memory for thread descriptor array");
363
364 last = threads;
365 }
366 else
367 {
368 ths = &th;
369 last = 1;
370 }
371
372 if (toplevel > threads)
373 {
374 printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n",
375 threads);
376 toplevel = threads;
377 }
378
379 if (timing)
380 {
381 if (clock_getcpuclockid (0, &cl) != 0
382 || clock_gettime (cl, &start_time) != 0)
383 timing = false;
384 }
385
386 /* We need this later. */
387 pid = getpid ();
388 tmain = pthread_self ();
389
390 /* We use signal SIGUSR1 for communication between the threads and
391 the main thread. We only want sychronous notification. */
392 if (sync_method == sync_signal)
393 {
394 sigemptyset (&ss);
395 sigaddset (&ss, SIGUSR1);
396 if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0)
397 error (EXIT_FAILURE, errno, "cannot set signal mask");
398 }
399
400 /* Create the thread attributes. */
401 pthread_attr_init (&attr);
402
403 /* If the user provided a stack size use it. */
404 if (stacksize != 0
405 && pthread_attr_setstacksize (&attr, stacksize) != 0)
406 puts ("could not set stack size; will use default");
407 /* And stack guard size. */
408 if (guardsize != -1
409 && pthread_attr_setguardsize (&attr, guardsize) != 0)
410 puts ("invalid stack guard size; will use default");
411
412 /* All threads are created detached if we are not using pthread_join
413 to synchronize. */
414 if (sync_method != sync_join)
415 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
416
417 if (sync_method == sync_signal)
418 {
419 while (1)
420 {
421 int err;
422 bool do_wait = false;
423
424 pthread_mutex_lock (&running_mutex);
425 if (starts-- < 0)
426 cont = false;
427 else
428 do_wait = ++running >= threads && starts > 0;
429
430 pthread_mutex_unlock (&running_mutex);
431
432 if (! cont)
433 break;
434
435 if (progress)
436 write (STDERR_FILENO, "t", 1);
437
438 err = pthread_create (&ths[empty], &attr, thread_function,
439 (void *) starts);
440 if (err != 0)
441 error (EXIT_FAILURE, err, "cannot start thread %lu", starts);
442
443 if (++empty == last)
444 empty = 0;
445
446 if (do_wait)
447 sigwaitinfo (&ss, NULL);
448 }
449
450 /* Do nothing anymore. On of the threads will terminate the program. */
451 sigfillset (&ss);
452 sigdelset (&ss, SIGINT);
453 while (1)
454 sigsuspend (&ss);
455 }
456 else
457 {
458 pthread_t ths[toplevel];
459 struct start_info si[toplevel];
460 unsigned int i;
461
462 for (i = 0; i < toplevel; ++i)
463 {
464 unsigned int child_starts = starts / (toplevel - i);
465 unsigned int child_threads = threads / (toplevel - i);
466 int err;
467
468 si[i].starts = child_starts;
469 si[i].threads = child_threads;
470
471 err = pthread_create (&ths[i], &attr, start_threads, &si[i]);
472 if (err != 0)
473 error (EXIT_FAILURE, err, "cannot start thread");
474
475 starts -= child_starts;
476 threads -= child_threads;
477 }
478
479 for (i = 0; i < toplevel; ++i)
480 {
481 int err = pthread_join (ths[i], NULL);
482
483 if (err != 0)
484 error (EXIT_FAILURE, err, "cannot join thread");
485 }
486
487 /* We are done. */
488 if (progress)
489 write (STDERR_FILENO, "\n", 1);
490
491 if (timing)
492 {
493 struct timespec end_time;
494
495 if (clock_gettime (cl, &end_time) == 0)
496 {
497 end_time.tv_sec -= start_time.tv_sec;
498 end_time.tv_nsec -= start_time.tv_nsec;
499 if (end_time.tv_nsec < 0)
500 {
501 end_time.tv_nsec += 1000000000;
502 --end_time.tv_sec;
503 }
504
505 printf ("\nRuntime: %lu.%09lu seconds\n",
506 (unsigned long int) end_time.tv_sec,
507 (unsigned long int) end_time.tv_nsec);
508 }
509 }
510
511 printf ("Result: %08x\n", sum);
512
513 exit (0);
514 }
515
516 /* NOTREACHED */
517 return 0;
518}
519
520
521/* Handle program arguments. */
522static error_t
523parse_opt (int key, char *arg, struct argp_state *state)
524{
525 unsigned long int num;
526 long int snum;
527
528 switch (key)
529 {
530 case 't':
531 num = strtoul (arg, NULL, 0);
efe0cd0f 532 if (num <= MAX_THREADS)
76a50749
UD
533 threads = num;
534 else
535 printf ("\
536number of threads limited to %u; recompile with a higher limit if necessary",
537 MAX_THREADS);
538 break;
539
540 case 'w':
541 num = strtoul (arg, NULL, 0);
542 if (num <= 100)
543 workload = num;
544 else
545 puts ("workload must be between 0 and 100 percent");
546 break;
547
548 case 'c':
549 workcost = strtoul (arg, NULL, 0);
550 break;
551
552 case 'r':
553 rounds = strtoul (arg, NULL, 0);
554 break;
555
556 case 's':
557 starts = strtoul (arg, NULL, 0);
558 break;
559
560 case 'S':
561 num = strtoul (arg, NULL, 0);
562 if (num >= PTHREAD_STACK_MIN)
563 stacksize = num;
564 else
565 printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN);
566 break;
567
568 case 'g':
569 snum = strtol (arg, NULL, 0);
570 if (snum < 0)
571 printf ("invalid guard size %s\n", arg);
572 else
573 guardsize = snum;
574 break;
575
576 case 'p':
577 progress = true;
578 break;
579
580 case 'T':
581 timing = true;
582 break;
583
584 case OPT_TO_THREAD:
585 to_thread = true;
586 break;
587
588 case OPT_TO_PROCESS:
589 to_thread = false;
590 break;
591
592 case OPT_SYNC_SIGNAL:
593 sync_method = sync_signal;
594 break;
595
596 case OPT_SYNC_JOIN:
597 sync_method = sync_join;
598 break;
599
600 case OPT_TOPLEVEL:
601 num = strtoul (arg, NULL, 0);
602 if (num < MAX_THREADS)
603 toplevel = num;
604 else
605 printf ("\
606number of threads limited to %u; recompile with a higher limit if necessary",
607 MAX_THREADS);
608 sync_method = sync_join;
609 break;
610
611 default:
612 return ARGP_ERR_UNKNOWN;
613 }
614
615 return 0;
616}
617
618
619static hp_timing_t
620get_clockfreq (void)
621{
622 /* We read the information from the /proc filesystem. It contains at
623 least one line like
624 cpu MHz : 497.840237
625 or also
626 cpu MHz : 497.841
627 We search for this line and convert the number in an integer. */
628 static hp_timing_t result;
629 int fd;
630
631 /* If this function was called before, we know the result. */
632 if (result != 0)
633 return result;
634
635 fd = open ("/proc/cpuinfo", O_RDONLY);
a1ffb40e 636 if (__glibc_likely (fd != -1))
76a50749
UD
637 {
638 /* XXX AFAIK the /proc filesystem can generate "files" only up
639 to a size of 4096 bytes. */
640 char buf[4096];
641 ssize_t n;
642
643 n = read (fd, buf, sizeof buf);
644 if (__builtin_expect (n, 1) > 0)
645 {
646 char *mhz = memmem (buf, n, "cpu MHz", 7);
647
a1ffb40e 648 if (__glibc_likely (mhz != NULL))
76a50749
UD
649 {
650 char *endp = buf + n;
651 int seen_decpoint = 0;
652 int ndigits = 0;
653
654 /* Search for the beginning of the string. */
655 while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
656 ++mhz;
657
658 while (mhz < endp && *mhz != '\n')
659 {
660 if (*mhz >= '0' && *mhz <= '9')
661 {
662 result *= 10;
663 result += *mhz - '0';
664 if (seen_decpoint)
665 ++ndigits;
666 }
667 else if (*mhz == '.')
668 seen_decpoint = 1;
669
670 ++mhz;
671 }
672
673 /* Compensate for missing digits at the end. */
674 while (ndigits++ < 6)
675 result *= 10;
676 }
677 }
678
679 close (fd);
680 }
681
682 return result;
683}
684
685
686int
687clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
688{
689 /* We don't allow any process ID but our own. */
690 if (pid != 0 && pid != getpid ())
691 return EPERM;
692
693#ifdef CLOCK_PROCESS_CPUTIME_ID
694 /* Store the number. */
695 *clock_id = CLOCK_PROCESS_CPUTIME_ID;
696
697 return 0;
698#else
699 /* We don't have a timer for that. */
700 return ENOENT;
701#endif
702}
703
704
ff48874d 705#ifdef i386
76a50749 706#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
5eac4760
RM
707#elif defined __x86_64__
708# define HP_TIMING_NOW(Var) \
709 ({ unsigned int _hi, _lo; \
710 asm volatile ("rdtsc" : "=a" (_lo), "=d" (_hi)); \
711 (Var) = ((unsigned long long int) _hi << 32) | _lo; })
ff48874d
UD
712#elif defined __ia64__
713#define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory")
714#else
715#error "HP_TIMING_NOW missing"
716#endif
76a50749
UD
717
718/* Get current value of CLOCK and store it in TP. */
719int
720clock_gettime (clockid_t clock_id, struct timespec *tp)
721{
722 int retval = -1;
723
724 switch (clock_id)
725 {
726 case CLOCK_PROCESS_CPUTIME_ID:
727 {
728
729 static hp_timing_t freq;
730 hp_timing_t tsc;
731
732 /* Get the current counter. */
733 HP_TIMING_NOW (tsc);
734
735 if (freq == 0)
736 {
737 freq = get_clockfreq ();
738 if (freq == 0)
739 return EINVAL;
740 }
741
742 /* Compute the seconds. */
743 tp->tv_sec = tsc / freq;
744
745 /* And the nanoseconds. This computation should be stable until
746 we get machines with about 16GHz frequency. */
747 tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq;
748
749 retval = 0;
750 }
751 break;
752
753 default:
754 errno = EINVAL;
755 break;
756 }
757
758 return retval;
759}