]> git.ipfire.org Git - thirdparty/glibc.git/blob - nptl/perf.c
(CFLAGS-tst-align.c): Add -mpreferred-stack-boundary=4.
[thirdparty/glibc.git] / nptl / perf.c
1 /* Copyright (C) 2002 Free Software Foundation, Inc.
2 This file is part of the GNU C Library.
3 Contributed by Ulrich Drepper <drepper@redhat.com>, 2002.
4
5 The GNU C Library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Lesser General Public
7 License as published by the Free Software Foundation; either
8 version 2.1 of the License, or (at your option) any later version.
9
10 The GNU C Library is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 Lesser General Public License for more details.
14
15 You should have received a copy of the GNU Lesser General Public
16 License along with the GNU C Library; if not, write to the Free
17 Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307 USA. */
19
20 #define _GNU_SOURCE 1
21 #include <argp.h>
22 #include <error.h>
23 #include <errno.h>
24 #include <fcntl.h>
25 #include <inttypes.h>
26 #include <limits.h>
27 #include <pthread.h>
28 #include <signal.h>
29 #include <stdbool.h>
30 #include <stdlib.h>
31 #include <string.h>
32 #include <time.h>
33 #include <unistd.h>
34 #include <sys/param.h>
35 #include <sys/types.h>
36
37 #ifndef MAX_THREADS
38 # define MAX_THREADS 100000
39 #endif
40 #ifndef DEFAULT_THREADS
41 # define DEFAULT_THREADS 50
42 #endif
43
44
45 #define OPT_TO_THREAD 300
46 #define OPT_TO_PROCESS 301
47 #define OPT_SYNC_SIGNAL 302
48 #define OPT_SYNC_JOIN 303
49 #define OPT_TOPLEVEL 304
50
51
52 static const struct argp_option options[] =
53 {
54 { NULL, 0, NULL, 0, "\
55 This is a test for threads so we allow ther user to selection the number of \
56 threads which are used at any one time. Independently the total number of \
57 rounds can be selected. This is the total number of threads which will have \
58 run when the process terminates:" },
59 { "threads", 't', "NUMBER", 0, "Number of threads used at once" },
60 { "starts", 's', "NUMBER", 0, "Total number of working threads" },
61 { "toplevel", OPT_TOPLEVEL, "NUMBER", 0,
62 "Number of toplevel threads which start the other threads; this \
63 implies --sync-join" },
64
65 { NULL, 0, NULL, 0, "\
66 Each thread can do one of two things: sleep or do work. The latter is 100% \
67 CPU bound. The work load is the probability a thread does work. All values \
68 from zero to 100 (inclusive) are valid. How often each thread repeats this \
69 can be determined by the number of rounds. The work cost determines how long \
70 each work session (not sleeping) takes. If it is zero a thread would \
71 effectively nothing. By setting the number of rounds to zero the thread \
72 does no work at all and pure thread creation times can be measured." },
73 { "workload", 'w', "PERCENT", 0, "Percentage of time spent working" },
74 { "workcost", 'c', "NUMBER", 0,
75 "Factor in the cost of each round of working" },
76 { "rounds", 'r', "NUMBER", 0, "Number of rounds each thread runs" },
77
78 { NULL, 0, NULL, 0, "\
79 There are a number of different methods how thread creation can be \
80 synchronized. Synchronization is necessary since the number of concurrently \
81 running threads is limited." },
82 { "sync-signal", OPT_SYNC_SIGNAL, NULL, 0,
83 "Synchronize using a signal (default)" },
84 { "sync-join", OPT_SYNC_JOIN, NULL, 0, "Synchronize using pthread_join" },
85
86 { NULL, 0, NULL, 0, "\
87 One parameter for each threads execution is the size of the stack. If this \
88 parameter is not used the system's default stack size is used. If many \
89 threads are used the stack size should be chosen quite small." },
90 { "stacksize", 'S', "BYTES", 0, "Size of threads stack" },
91 { "guardsize", 'g', "BYTES", 0,
92 "Size of stack guard area; must fit into the stack" },
93
94 { NULL, 0, NULL, 0, "Signal options:" },
95 { "to-thread", OPT_TO_THREAD, NULL, 0, "Send signal to main thread" },
96 { "to-process", OPT_TO_PROCESS, NULL, 0,
97 "Send signal to process (default)" },
98
99 { NULL, 0, NULL, 0, "Administrative options:" },
100 { "progress", 'p', NULL, 0, "Show signs of progress" },
101 { "timing", 'T', NULL, 0,
102 "Measure time from startup to the last thread finishing" },
103 { NULL, 0, NULL, 0, NULL }
104 };
105
106 /* Prototype for option handler. */
107 static error_t parse_opt (int key, char *arg, struct argp_state *state);
108
109 /* Data structure to communicate with argp functions. */
110 static struct argp argp =
111 {
112 options, parse_opt
113 };
114
115
116 static unsigned long int threads = DEFAULT_THREADS;
117 static unsigned long int workload = 75;
118 static unsigned long int workcost = 20;
119 static unsigned long int rounds = 10;
120 static long int starts = 5000;
121 static unsigned long int stacksize;
122 static long int guardsize = -1;
123 static bool progress;
124 static bool timing;
125 static bool to_thread;
126 static unsigned long int toplevel = 1;
127
128
129 static long int running;
130 static pthread_mutex_t running_mutex = PTHREAD_MUTEX_INITIALIZER;
131
132 static pid_t pid;
133 static pthread_t tmain;
134
135 static clockid_t cl;
136 static struct timespec start_time;
137
138
139 static pthread_mutex_t sum_mutex = PTHREAD_MUTEX_INITIALIZER;
140 unsigned int sum;
141
142 static enum
143 {
144 sync_signal,
145 sync_join
146 }
147 sync_method;
148
149
150 /* We use 64bit values for the times. */
151 typedef unsigned long long int hp_timing_t;
152
153
154 /* Attributes for all created threads. */
155 static pthread_attr_t attr;
156
157
158 static void *
159 work (void *arg)
160 {
161 unsigned long int i;
162 unsigned int state = (unsigned long int) arg;
163
164 for (i = 0; i < rounds; ++i)
165 {
166 /* Determine what to do. */
167 unsigned int rnum;
168
169 /* Uniform distribution. */
170 do
171 rnum = rand_r (&state);
172 while (rnum >= UINT_MAX - (UINT_MAX % 100));
173
174 rnum %= 100;
175
176 if (rnum < workload)
177 {
178 int j;
179 int a[4] = { i, rnum, i + rnum, rnum - i };
180
181 if (progress)
182 write (STDERR_FILENO, "c", 1);
183
184 for (j = 0; j < workcost; ++j)
185 {
186 a[0] += a[3] >> 12;
187 a[1] += a[2] >> 20;
188 a[2] += a[1] ^ 0x3423423;
189 a[3] += a[0] - a[1];
190 }
191
192 pthread_mutex_lock (&sum_mutex);
193 sum += a[0] + a[1] + a[2] + a[3];
194 pthread_mutex_unlock (&sum_mutex);
195 }
196 else
197 {
198 /* Just sleep. */
199 struct timespec tv;
200
201 tv.tv_sec = 0;
202 tv.tv_nsec = 10000000;
203
204 if (progress)
205 write (STDERR_FILENO, "w", 1);
206
207 nanosleep (&tv, NULL);
208 }
209 }
210
211 return NULL;
212 }
213
214
215 static void *
216 thread_function (void *arg)
217 {
218 work (arg);
219
220 pthread_mutex_lock (&running_mutex);
221 if (--running <= 0 && starts <= 0)
222 {
223 /* We are done. */
224 if (progress)
225 write (STDERR_FILENO, "\n", 1);
226
227 if (timing)
228 {
229 struct timespec end_time;
230
231 if (clock_gettime (cl, &end_time) == 0)
232 {
233 end_time.tv_sec -= start_time.tv_sec;
234 end_time.tv_nsec -= start_time.tv_nsec;
235 if (end_time.tv_nsec < 0)
236 {
237 end_time.tv_nsec += 1000000000;
238 --end_time.tv_sec;
239 }
240
241 printf ("\nRuntime: %lu.%09lu seconds\n",
242 (unsigned long int) end_time.tv_sec,
243 (unsigned long int) end_time.tv_nsec);
244 }
245 }
246
247 printf ("Result: %08x\n", sum);
248
249 exit (0);
250 }
251 pthread_mutex_unlock (&running_mutex);
252
253 if (sync_method == sync_signal)
254 {
255 if (to_thread)
256 /* This code sends a signal to the main thread. */
257 pthread_kill (tmain, SIGUSR1);
258 else
259 /* Use this code to test sending a signal to the process. */
260 kill (pid, SIGUSR1);
261 }
262
263 if (progress)
264 write (STDERR_FILENO, "f", 1);
265
266 return NULL;
267 }
268
269
270 struct start_info
271 {
272 unsigned int starts;
273 unsigned int threads;
274 };
275
276
277 static void *
278 start_threads (void *arg)
279 {
280 struct start_info *si = arg;
281 unsigned int starts = si->starts;
282 pthread_t ths[si->threads];
283 unsigned int state = starts;
284 unsigned int n;
285 unsigned int i = 0;
286 int err;
287
288 if (progress)
289 write (STDERR_FILENO, "T", 1);
290
291 memset (ths, '\0', sizeof (pthread_t) * si->threads);
292
293 while (starts-- > 0)
294 {
295 if (ths[i] != 0)
296 {
297 /* Wait for the threads in the order they were created. */
298 err = pthread_join (ths[i], NULL);
299 if (err != 0)
300 error (EXIT_FAILURE, err, "cannot join thread");
301
302 if (progress)
303 write (STDERR_FILENO, "f", 1);
304 }
305
306 err = pthread_create (&ths[i], &attr, work,
307 (void *) (long) (rand_r (&state) + starts + i));
308
309 if (err != 0)
310 error (EXIT_FAILURE, err, "cannot start thread");
311
312 if (progress)
313 write (STDERR_FILENO, "t", 1);
314
315 if (++i == si->threads)
316 i = 0;
317 }
318
319 n = i;
320 do
321 {
322 if (ths[i] != 0)
323 {
324 err = pthread_join (ths[i], NULL);
325 if (err != 0)
326 error (EXIT_FAILURE, err, "cannot join thread");
327
328 if (progress)
329 write (STDERR_FILENO, "f", 1);
330 }
331
332 if (++i == si->threads)
333 i = 0;
334 }
335 while (i != n);
336
337 if (progress)
338 write (STDERR_FILENO, "F", 1);
339
340 return NULL;
341 }
342
343
344 int
345 main (int argc, char *argv[])
346 {
347 int remaining;
348 sigset_t ss;
349 pthread_t th;
350 pthread_t *ths = NULL;
351 int empty = 0;
352 int last;
353 bool cont = true;
354
355 /* Parse and process arguments. */
356 argp_parse (&argp, argc, argv, 0, &remaining, NULL);
357
358 if (sync_method == sync_join)
359 {
360 ths = (pthread_t *) calloc (threads, sizeof (pthread_t));
361 if (ths == NULL)
362 error (EXIT_FAILURE, errno,
363 "cannot allocate memory for thread descriptor array");
364
365 last = threads;
366 }
367 else
368 {
369 ths = &th;
370 last = 1;
371 }
372
373 if (toplevel > threads)
374 {
375 printf ("resetting number of toplevel threads to %lu to not surpass number to concurrent threads\n",
376 threads);
377 toplevel = threads;
378 }
379
380 if (timing)
381 {
382 if (clock_getcpuclockid (0, &cl) != 0
383 || clock_gettime (cl, &start_time) != 0)
384 timing = false;
385 }
386
387 /* We need this later. */
388 pid = getpid ();
389 tmain = pthread_self ();
390
391 /* We use signal SIGUSR1 for communication between the threads and
392 the main thread. We only want sychronous notification. */
393 if (sync_method == sync_signal)
394 {
395 sigemptyset (&ss);
396 sigaddset (&ss, SIGUSR1);
397 if (sigprocmask (SIG_BLOCK, &ss, NULL) != 0)
398 error (EXIT_FAILURE, errno, "cannot set signal mask");
399 }
400
401 /* Create the thread attributes. */
402 pthread_attr_init (&attr);
403
404 /* If the user provided a stack size use it. */
405 if (stacksize != 0
406 && pthread_attr_setstacksize (&attr, stacksize) != 0)
407 puts ("could not set stack size; will use default");
408 /* And stack guard size. */
409 if (guardsize != -1
410 && pthread_attr_setguardsize (&attr, guardsize) != 0)
411 puts ("invalid stack guard size; will use default");
412
413 /* All threads are created detached if we are not using pthread_join
414 to synchronize. */
415 if (sync_method != sync_join)
416 pthread_attr_setdetachstate (&attr, PTHREAD_CREATE_DETACHED);
417
418 if (sync_method == sync_signal)
419 {
420 while (1)
421 {
422 int err;
423 bool do_wait = false;
424
425 pthread_mutex_lock (&running_mutex);
426 if (starts-- < 0)
427 cont = false;
428 else
429 do_wait = ++running >= threads && starts > 0;
430
431 pthread_mutex_unlock (&running_mutex);
432
433 if (! cont)
434 break;
435
436 if (progress)
437 write (STDERR_FILENO, "t", 1);
438
439 err = pthread_create (&ths[empty], &attr, thread_function,
440 (void *) starts);
441 if (err != 0)
442 error (EXIT_FAILURE, err, "cannot start thread %lu", starts);
443
444 if (++empty == last)
445 empty = 0;
446
447 if (do_wait)
448 sigwaitinfo (&ss, NULL);
449 }
450
451 /* Do nothing anymore. On of the threads will terminate the program. */
452 sigfillset (&ss);
453 sigdelset (&ss, SIGINT);
454 while (1)
455 sigsuspend (&ss);
456 }
457 else
458 {
459 pthread_t ths[toplevel];
460 struct start_info si[toplevel];
461 unsigned int i;
462
463 for (i = 0; i < toplevel; ++i)
464 {
465 unsigned int child_starts = starts / (toplevel - i);
466 unsigned int child_threads = threads / (toplevel - i);
467 int err;
468
469 si[i].starts = child_starts;
470 si[i].threads = child_threads;
471
472 err = pthread_create (&ths[i], &attr, start_threads, &si[i]);
473 if (err != 0)
474 error (EXIT_FAILURE, err, "cannot start thread");
475
476 starts -= child_starts;
477 threads -= child_threads;
478 }
479
480 for (i = 0; i < toplevel; ++i)
481 {
482 int err = pthread_join (ths[i], NULL);
483
484 if (err != 0)
485 error (EXIT_FAILURE, err, "cannot join thread");
486 }
487
488 /* We are done. */
489 if (progress)
490 write (STDERR_FILENO, "\n", 1);
491
492 if (timing)
493 {
494 struct timespec end_time;
495
496 if (clock_gettime (cl, &end_time) == 0)
497 {
498 end_time.tv_sec -= start_time.tv_sec;
499 end_time.tv_nsec -= start_time.tv_nsec;
500 if (end_time.tv_nsec < 0)
501 {
502 end_time.tv_nsec += 1000000000;
503 --end_time.tv_sec;
504 }
505
506 printf ("\nRuntime: %lu.%09lu seconds\n",
507 (unsigned long int) end_time.tv_sec,
508 (unsigned long int) end_time.tv_nsec);
509 }
510 }
511
512 printf ("Result: %08x\n", sum);
513
514 exit (0);
515 }
516
517 /* NOTREACHED */
518 return 0;
519 }
520
521
522 /* Handle program arguments. */
523 static error_t
524 parse_opt (int key, char *arg, struct argp_state *state)
525 {
526 unsigned long int num;
527 long int snum;
528
529 switch (key)
530 {
531 case 't':
532 num = strtoul (arg, NULL, 0);
533 if (num <= MAX_THREADS)
534 threads = num;
535 else
536 printf ("\
537 number of threads limited to %u; recompile with a higher limit if necessary",
538 MAX_THREADS);
539 break;
540
541 case 'w':
542 num = strtoul (arg, NULL, 0);
543 if (num <= 100)
544 workload = num;
545 else
546 puts ("workload must be between 0 and 100 percent");
547 break;
548
549 case 'c':
550 workcost = strtoul (arg, NULL, 0);
551 break;
552
553 case 'r':
554 rounds = strtoul (arg, NULL, 0);
555 break;
556
557 case 's':
558 starts = strtoul (arg, NULL, 0);
559 break;
560
561 case 'S':
562 num = strtoul (arg, NULL, 0);
563 if (num >= PTHREAD_STACK_MIN)
564 stacksize = num;
565 else
566 printf ("minimum stack size is %d\n", PTHREAD_STACK_MIN);
567 break;
568
569 case 'g':
570 snum = strtol (arg, NULL, 0);
571 if (snum < 0)
572 printf ("invalid guard size %s\n", arg);
573 else
574 guardsize = snum;
575 break;
576
577 case 'p':
578 progress = true;
579 break;
580
581 case 'T':
582 timing = true;
583 break;
584
585 case OPT_TO_THREAD:
586 to_thread = true;
587 break;
588
589 case OPT_TO_PROCESS:
590 to_thread = false;
591 break;
592
593 case OPT_SYNC_SIGNAL:
594 sync_method = sync_signal;
595 break;
596
597 case OPT_SYNC_JOIN:
598 sync_method = sync_join;
599 break;
600
601 case OPT_TOPLEVEL:
602 num = strtoul (arg, NULL, 0);
603 if (num < MAX_THREADS)
604 toplevel = num;
605 else
606 printf ("\
607 number of threads limited to %u; recompile with a higher limit if necessary",
608 MAX_THREADS);
609 sync_method = sync_join;
610 break;
611
612 default:
613 return ARGP_ERR_UNKNOWN;
614 }
615
616 return 0;
617 }
618
619
620 static hp_timing_t
621 get_clockfreq (void)
622 {
623 /* We read the information from the /proc filesystem. It contains at
624 least one line like
625 cpu MHz : 497.840237
626 or also
627 cpu MHz : 497.841
628 We search for this line and convert the number in an integer. */
629 static hp_timing_t result;
630 int fd;
631
632 /* If this function was called before, we know the result. */
633 if (result != 0)
634 return result;
635
636 fd = open ("/proc/cpuinfo", O_RDONLY);
637 if (__builtin_expect (fd != -1, 1))
638 {
639 /* XXX AFAIK the /proc filesystem can generate "files" only up
640 to a size of 4096 bytes. */
641 char buf[4096];
642 ssize_t n;
643
644 n = read (fd, buf, sizeof buf);
645 if (__builtin_expect (n, 1) > 0)
646 {
647 char *mhz = memmem (buf, n, "cpu MHz", 7);
648
649 if (__builtin_expect (mhz != NULL, 1))
650 {
651 char *endp = buf + n;
652 int seen_decpoint = 0;
653 int ndigits = 0;
654
655 /* Search for the beginning of the string. */
656 while (mhz < endp && (*mhz < '0' || *mhz > '9') && *mhz != '\n')
657 ++mhz;
658
659 while (mhz < endp && *mhz != '\n')
660 {
661 if (*mhz >= '0' && *mhz <= '9')
662 {
663 result *= 10;
664 result += *mhz - '0';
665 if (seen_decpoint)
666 ++ndigits;
667 }
668 else if (*mhz == '.')
669 seen_decpoint = 1;
670
671 ++mhz;
672 }
673
674 /* Compensate for missing digits at the end. */
675 while (ndigits++ < 6)
676 result *= 10;
677 }
678 }
679
680 close (fd);
681 }
682
683 return result;
684 }
685
686
687 int
688 clock_getcpuclockid (pid_t pid, clockid_t *clock_id)
689 {
690 /* We don't allow any process ID but our own. */
691 if (pid != 0 && pid != getpid ())
692 return EPERM;
693
694 #ifdef CLOCK_PROCESS_CPUTIME_ID
695 /* Store the number. */
696 *clock_id = CLOCK_PROCESS_CPUTIME_ID;
697
698 return 0;
699 #else
700 /* We don't have a timer for that. */
701 return ENOENT;
702 #endif
703 }
704
705
706 #ifdef i386
707 #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("rdtsc" : "=A" (Var))
708 #elif defined __ia64__
709 #define HP_TIMING_NOW(Var) __asm__ __volatile__ ("mov %0=ar.itc" : "=r" (Var) : : "memory")
710 #else
711 #error "HP_TIMING_NOW missing"
712 #endif
713
714 /* Get current value of CLOCK and store it in TP. */
715 int
716 clock_gettime (clockid_t clock_id, struct timespec *tp)
717 {
718 int retval = -1;
719
720 switch (clock_id)
721 {
722 case CLOCK_PROCESS_CPUTIME_ID:
723 {
724
725 static hp_timing_t freq;
726 hp_timing_t tsc;
727
728 /* Get the current counter. */
729 HP_TIMING_NOW (tsc);
730
731 if (freq == 0)
732 {
733 freq = get_clockfreq ();
734 if (freq == 0)
735 return EINVAL;
736 }
737
738 /* Compute the seconds. */
739 tp->tv_sec = tsc / freq;
740
741 /* And the nanoseconds. This computation should be stable until
742 we get machines with about 16GHz frequency. */
743 tp->tv_nsec = ((tsc % freq) * UINT64_C (1000000000)) / freq;
744
745 retval = 0;
746 }
747 break;
748
749 default:
750 errno = EINVAL;
751 break;
752 }
753
754 return retval;
755 }