]>
Commit | Line | Data |
---|---|---|
13d83320 | 1 | /* High precision, low overhead timing functions. powerpc64 version. |
d4697bc9 | 2 | Copyright (C) 2005-2014 Free Software Foundation, Inc. |
13d83320 UD |
3 | This file is part of the GNU C Library. |
4 | Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998. | |
5 | ||
6 | The GNU C Library is free software; you can redistribute it and/or | |
7 | modify it under the terms of the GNU Lesser General Public | |
8 | License as published by the Free Software Foundation; either | |
9 | version 2.1 of the License, or (at your option) any later version. | |
10 | ||
11 | The GNU C Library is distributed in the hope that it will be useful, | |
12 | but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
14 | Lesser General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU Lesser General Public | |
59ba27a6 PE |
17 | License along with the GNU C Library; if not, see |
18 | <http://www.gnu.org/licenses/>. */ | |
13d83320 UD |
19 | |
20 | #ifndef _HP_TIMING_H | |
21 | #define _HP_TIMING_H 1 | |
22 | ||
23 | #include <string.h> | |
24 | #include <sys/param.h> | |
eb96ffb0 | 25 | #include <_itoa.h> |
13d83320 UD |
26 | #include <atomic.h> |
27 | ||
28 | /* The macros defined here use the powerpc 64-bit time base register. | |
29 | The time base is nominally clocked at 1/8th the CPU clock, but this | |
30 | can vary. | |
31 | ||
32 | The list of macros we need includes the following: | |
33 | ||
34 | - HP_TIMING_AVAIL: test for availability. | |
35 | ||
36 | - HP_TIMING_INLINE: this macro is non-zero if the functionality is not | |
37 | implemented using function calls but instead uses some inlined code | |
38 | which might simply consist of a few assembler instructions. We have to | |
39 | know this since we might want to use the macros here in places where we | |
40 | cannot make function calls. | |
41 | ||
42 | - hp_timing_t: This is the type for variables used to store the time | |
43 | values. | |
44 | ||
45 | - HP_TIMING_ZERO: clear `hp_timing_t' object. | |
46 | ||
47 | - HP_TIMING_NOW: place timestamp for current time in variable given as | |
48 | parameter. | |
49 | ||
50 | - HP_TIMING_DIFF_INIT: do whatever is necessary to be able to use the | |
51 | HP_TIMING_DIFF macro. | |
52 | ||
53 | - HP_TIMING_DIFF: compute difference between two times and store it | |
54 | in a third. Source and destination might overlap. | |
55 | ||
56 | - HP_TIMING_ACCUM: add time difference to another variable. This might | |
57 | be a bit more complicated to implement for some platforms as the | |
58 | operation should be thread-safe and 64bit arithmetic on 32bit platforms | |
59 | is not. | |
60 | ||
61 | - HP_TIMING_ACCUM_NT: this is the variant for situations where we know | |
62 | there are no threads involved. | |
63 | ||
64 | - HP_TIMING_PRINT: write decimal representation of the timing value into | |
65 | the given string. This operation need not be inline even though | |
66 | HP_TIMING_INLINE is specified. | |
67 | ||
68 | */ | |
69 | ||
70 | /* We always assume having the timestamp register. */ | |
71 | #define HP_TIMING_AVAIL (1) | |
72 | ||
73 | /* We indeed have inlined functions. */ | |
74 | #define HP_TIMING_INLINE (1) | |
75 | ||
76 | /* We use 64bit values for the times. */ | |
77 | typedef unsigned long long int hp_timing_t; | |
78 | ||
79 | /* Set timestamp value to zero. */ | |
80 | #define HP_TIMING_ZERO(Var) (Var) = (0) | |
81 | ||
82 | /* That's quite simple. Use the `mftb' instruction. Note that the value | |
83 | might not be 100% accurate since there might be some more instructions | |
84 | running in this moment. This could be changed by using a barrier like | |
2ccdea26 | 85 | 'lwsync' right before the `mftb' instruction. But we are not interested |
13d83320 UD |
86 | in accurate clock cycles here so we don't do this. */ |
87 | ||
88 | #define HP_TIMING_NOW(Var) \ | |
89 | do { \ | |
fef13a78 AM |
90 | unsigned int hi, lo, tmp; \ |
91 | __asm__ __volatile__ ("1: mfspr %0,269;" \ | |
92 | " mfspr %1,268;" \ | |
93 | " mfspr %2,269;" \ | |
94 | " cmpw %0,%2;" \ | |
95 | " bne 1b;" \ | |
96 | : "=&r" (hi), "=&r" (lo), "=&r" (tmp) \ | |
97 | : : "cr0"); \ | |
98 | Var = ((hp_timing_t) hi << 32) | lo; \ | |
13d83320 UD |
99 | } while (0) |
100 | ||
101 | ||
102 | /* Use two 'mftb' instructions in a row to find out how long it takes. | |
103 | On current POWER4, POWER5, and 970 processors mftb take ~10 cycles. */ | |
104 | #define HP_TIMING_DIFF_INIT() \ | |
105 | do { \ | |
106 | if (GLRO(dl_hp_timing_overhead) == 0) \ | |
107 | { \ | |
108 | int __cnt = 5; \ | |
109 | GLRO(dl_hp_timing_overhead) = ~0ull; \ | |
110 | do \ | |
111 | { \ | |
112 | hp_timing_t __t1, __t2; \ | |
113 | HP_TIMING_NOW (__t1); \ | |
114 | HP_TIMING_NOW (__t2); \ | |
115 | if (__t2 - __t1 < GLRO(dl_hp_timing_overhead)) \ | |
116 | GLRO(dl_hp_timing_overhead) = __t2 - __t1; \ | |
117 | } \ | |
118 | while (--__cnt > 0); \ | |
119 | } \ | |
120 | } while (0) | |
121 | ||
122 | /* It's simple arithmetic in 64-bit. */ | |
123 | #define HP_TIMING_DIFF(Diff, Start, End) (Diff) = ((End) - (Start)) | |
124 | ||
125 | /* We need to insure that this add is atomic in threaded environments. We use | |
126 | __arch_atomic_exchange_and_add_64 from atomic.h to get thread safety. */ | |
127 | #define HP_TIMING_ACCUM(Sum, Diff) \ | |
128 | do { \ | |
129 | hp_timing_t __diff = (Diff) - GLRO(dl_hp_timing_overhead); \ | |
130 | __arch_atomic_exchange_and_add_64 (&(Sum), __diff); \ | |
131 | } while (0) | |
132 | ||
133 | /* No threads, no extra work. */ | |
134 | #define HP_TIMING_ACCUM_NT(Sum, Diff) (Sum) += (Diff) | |
135 | ||
136 | /* Print the time value. */ | |
137 | #define HP_TIMING_PRINT(Buf, Len, Val) \ | |
138 | do { \ | |
139 | char __buf[20]; \ | |
140 | char *__cp = _itoa (Val, __buf + sizeof (__buf), 10, 0); \ | |
141 | size_t __len = (Len); \ | |
142 | char *__dest = (Buf); \ | |
143 | while (__len-- > 0 && __cp < __buf + sizeof (__buf)) \ | |
144 | *__dest++ = *__cp++; \ | |
145 | memcpy (__dest, " ticks", MIN (__len, sizeof (" ticks"))); \ | |
146 | } while (0) | |
147 | ||
148 | #endif /* hp-timing.h */ |