]>
Commit | Line | Data |
---|---|---|
88bfe092 | 1 | /* |
262a0e14 | 2 | * $Id$ |
88bfe092 | 3 | * |
507d0a78 | 4 | * DEBUG: section 81 CPU Profiling Routines |
88bfe092 | 5 | * AUTHOR: Andres Kroonmaa, Sep.2000 |
6 | * | |
7 | * SQUID Internet Object Cache http://squid.nlanr.net/Squid/ | |
8 | * ---------------------------------------------------------- | |
9 | * | |
10 | * Squid is the result of efforts by numerous individuals from the | |
11 | * Internet community. Development is led by Duane Wessels of the | |
12 | * National Laboratory for Applied Network Research and funded by the | |
13 | * National Science Foundation. Squid is Copyrighted (C) 1998 by | |
14 | * the Regents of the University of California. Please see the | |
15 | * COPYRIGHT file for full details. Squid incorporates software | |
16 | * developed and/or copyrighted by other sources. Please see the | |
17 | * CREDITS file for full details. | |
18 | * | |
19 | * This program is free software; you can redistribute it and/or modify | |
20 | * it under the terms of the GNU General Public License as published by | |
21 | * the Free Software Foundation; either version 2 of the License, or | |
22 | * (at your option) any later version. | |
26ac0430 | 23 | * |
88bfe092 | 24 | * This program is distributed in the hope that it will be useful, |
25 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
26 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
27 | * GNU General Public License for more details. | |
26ac0430 | 28 | * |
88bfe092 | 29 | * You should have received a copy of the GNU General Public License |
30 | * along with this program; if not, write to the Free Software | |
31 | * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA. | |
32 | * | |
33 | */ | |
34 | ||
dc47f531 | 35 | /** |
88bfe092 | 36 | * CPU Profiling implementation. |
26ac0430 | 37 | * |
dc47f531 | 38 | * \par |
88bfe092 | 39 | * This library implements the Probes needed to gather stats. |
40 | * See src/ProfStats.c which implements historical recording and | |
41 | * presentation in CacheMgr.cgi. | |
26ac0430 | 42 | * |
dc47f531 | 43 | * \par |
88bfe092 | 44 | * For timing we prefer on-CPU ops that retrieve cpu ticks counter. |
45 | * For Intel, this is "rdtsc", which is 64-bit counter that virtually | |
46 | * never wraps. For alpha, this is "rpcc" which is 32-bit counter and | |
47 | * wraps every few seconds. Currently, no handling of wrapping counters | |
48 | * is implemented. Other CPU's are also not covered. Potentially all | |
49 | * modern CPU's has similar counters. | |
26ac0430 | 50 | * |
88bfe092 | 51 | * Usage. |
52 | * Insert macro PROF_state(probename) in strategic places in code. | |
53 | * PROF_start(probename); | |
54 | * ... section of code measured ... | |
55 | * PROF_stop(probename); | |
26ac0430 | 56 | * |
25f98340 | 57 | * probename must be added to the xprof_type.h enum list |
88bfe092 | 58 | * with prepended "XPROF_" string. |
59 | * | |
dc47f531 AJ |
60 | * \section description Description. |
61 | * \par PROF | |
62 | * gathers stats per probename into structures. It indexes these | |
88bfe092 | 63 | * structures by enum type index in an array. |
26ac0430 | 64 | * |
dc47f531 AJ |
65 | * \par PROF |
66 | * records best, best, average and worst values for delta time, | |
88bfe092 | 67 | * also, if UNACCED is defined, it measures "empty" time during which |
68 | * no probes are in measuring state. This allows to see time "unaccounted" | |
69 | * for. If OVERHEAD is defined, additional calculations are made at every | |
70 | * probe to measure approximate overhead of the probe code itself. | |
26ac0430 | 71 | * |
dc47f531 | 72 | * \par |
88bfe092 | 73 | * Probe data is stored in linked-list, so the more probes you define, |
74 | * the more overhead is added to find the deepest nested probe. To reduce | |
75 | * average overhead, linked list is manipulated each time PR_start is | |
76 | * called, so that probe just started is moved 1 position up in linkedlist. | |
77 | * This way frequently used probes are moved closer to the head of list, | |
78 | * reducing average overhead. | |
79 | * Note that all overhead is on the scale of one hundred of CPU clock | |
80 | * ticks, which on the scale of submicroseconds. Yet, to optimise really | |
81 | * fast and frequent sections of code, we want to reduce this overhead | |
82 | * to absolute minimum possible. | |
26ac0430 | 83 | * |
dc47f531 | 84 | * \par |
88bfe092 | 85 | * For actual measurements, probe overhead cancels out mostly. Still, |
86 | * do not take the measured times as facts, they should be viewed in | |
87 | * relative comparison to overall CPU time and on the same platform. | |
26ac0430 | 88 | * |
dc47f531 | 89 | * \par |
88bfe092 | 90 | * Every 1 second, Event within squid is called that parses gathered |
91 | * statistics of every probe, and accumulates that into historical | |
92 | * structures for last 1,5,30 secs, 1,5,30 mins, and 1,5 and 24 hours. | |
93 | * Each second active probe stats are reset, and only historical data | |
94 | * is presented in cachemgr output. | |
26ac0430 | 95 | * |
dc47f531 AJ |
96 | * \section reading Reading stats. |
97 | * \par | |
88bfe092 | 98 | * "Worst case" may be misleading. Anything can happen at any section |
99 | * of code that could delay reaching to probe stop. For eg. system may | |
100 | * need to service interrupt routine, task switch could occur, or page | |
101 | * fault needs to be handled. In this sense, this is quite meaningless | |
102 | * metric. "Best case" shows fastest completion of probe section, and | |
103 | * is also somewhat useless, unless you know that amount of work is | |
104 | * constant. Best metric to watch is "average time" and total cumulated | |
105 | * time in given timeframe, which really show percentage of time spent | |
106 | * in given section of code, and its average completion time. This data | |
107 | * could be used to detect bottlenecks withing squid and optimise them. | |
26ac0430 | 108 | * |
dc47f531 | 109 | * \par |
88bfe092 | 110 | * TOTALS are quite off reality. Its there just to summarise cumulative |
111 | * times and percent column. Percent values over 100% shows that there | |
112 | * have been some probes nested into each other. | |
26ac0430 | 113 | * |
88bfe092 | 114 | */ |
115 | ||
f7f3304a | 116 | #include "squid.h" |
25f98340 | 117 | #include "profiler/Profiler.h" |
cf8d9cd4 | 118 | |
32d002cb | 119 | #if USE_XPROF_STATS |
88bfe092 | 120 | |
121 | ||
dc47f531 AJ |
122 | #if HAVE_ASSERT_H |
123 | #include <assert.h> | |
124 | #endif | |
88bfe092 | 125 | #if HAVE_GNUMALLLOC_H |
126 | #include <gnumalloc.h> | |
482aa790 | 127 | #elif HAVE_MALLOC_H |
88bfe092 | 128 | #include <malloc.h> |
129 | #endif | |
130 | #if HAVE_UNISTD_H | |
131 | #include <unistd.h> | |
132 | #endif | |
133 | ||
134 | /* Exported Data */ | |
135 | TimersArray *xprof_Timers = NULL; | |
88bfe092 | 136 | |
137 | /* Private stuff */ | |
138 | ||
4fd8d08f | 139 | |
140 | /* new stuff */ | |
141 | #define MAXSTACKDEPTH 512 | |
142 | ||
143 | struct _callstack_entry { | |
26ac0430 AJ |
144 | int timer; /* index into timers array */ |
145 | const char *name; | |
146 | hrtime_t start, stop, accum; | |
4fd8d08f | 147 | }; |
148 | ||
149 | struct _callstack_entry cstack[MAXSTACKDEPTH]; | |
150 | int cstack_head = 0; | |
151 | ||
0ef0f1de | 152 | #if defined(_MSC_VER) /* Microsoft C Compiler ONLY */ |
153 | static __inline void | |
154 | #else | |
88bfe092 | 155 | static inline void |
0ef0f1de | 156 | #endif |
88bfe092 | 157 | xprof_update(xprof_stats_data * head) |
158 | { | |
88bfe092 | 159 | if (head->delta < head->best) |
26ac0430 | 160 | head->best = head->delta; |
88bfe092 | 161 | if (head->worst < head->delta) |
26ac0430 | 162 | head->worst = head->delta; |
88bfe092 | 163 | head->summ += head->delta; |
164 | head->count++; | |
165 | } | |
166 | ||
167 | static xprof_stats_data *xp_UNACCOUNTED; | |
168 | static int xprof_inited = 0; | |
169 | ||
170 | static void | |
171 | xprof_InitLib(void) | |
172 | { | |
173 | if (xprof_inited) | |
26ac0430 | 174 | return; |
88bfe092 | 175 | |
25f98340 | 176 | xprof_Timers = static_cast<TimersArray *>(calloc(XPROF_LAST + 2, sizeof(xprof_stats_node))); |
88bfe092 | 177 | |
178 | xprof_Timers[XPROF_PROF_UNACCOUNTED]->name = "PROF_UNACCOUNTED"; | |
179 | xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu.start = get_tick(); | |
180 | xp_UNACCOUNTED = &xprof_Timers[XPROF_PROF_UNACCOUNTED]->accu; | |
4fd8d08f | 181 | cstack_head = 0; |
88bfe092 | 182 | xprof_inited = 1; |
183 | } | |
184 | ||
185 | void | |
186 | xprof_start(xprof_type type, const char *timer) | |
187 | { | |
4fd8d08f | 188 | hrtime_t tt = get_tick(); |
88bfe092 | 189 | if (!xprof_inited) |
26ac0430 | 190 | xprof_InitLib(); |
88bfe092 | 191 | |
4fd8d08f | 192 | /* If nested, stop current stack frame */ |
193 | if (cstack_head > 0) { | |
26ac0430 AJ |
194 | cstack[cstack_head - 1].accum += get_tick() - cstack[cstack_head - 1].start; |
195 | cstack[cstack_head - 1].start = -1; | |
4fd8d08f | 196 | } |
197 | ||
198 | /* Are we at the first level? If so; stop the unaccounted timer */ | |
199 | if (cstack_head == 0) { | |
26ac0430 AJ |
200 | assert(xp_UNACCOUNTED->start != -1); |
201 | xp_UNACCOUNTED->delta = tt - xp_UNACCOUNTED->start; | |
202 | xp_UNACCOUNTED->start = -1; | |
203 | xprof_update(xp_UNACCOUNTED); | |
4fd8d08f | 204 | } |
205 | ||
206 | /* Allocate new stack frame */ | |
207 | cstack[cstack_head].start = tt; | |
208 | cstack[cstack_head].stop = -1; | |
209 | cstack[cstack_head].accum = 0; | |
210 | cstack[cstack_head].timer = type; | |
211 | cstack[cstack_head].name = timer; | |
212 | cstack_head++; | |
213 | assert(cstack_head < MAXSTACKDEPTH); | |
88bfe092 | 214 | |
88bfe092 | 215 | } |
216 | ||
217 | void | |
218 | xprof_stop(xprof_type type, const char *timer) | |
219 | { | |
4fd8d08f | 220 | hrtime_t tt = get_tick(); |
221 | assert(cstack_head > 0); | |
222 | cstack_head--; | |
223 | assert(cstack[cstack_head].timer == type); | |
224 | ||
225 | /* Record timer details */ | |
226 | cstack[cstack_head].accum += tt - cstack[cstack_head].start; | |
227 | xprof_Timers[type]->accu.delta = cstack[cstack_head].accum; | |
228 | xprof_Timers[type]->name = timer; | |
229 | ||
230 | /* Update */ | |
88bfe092 | 231 | xprof_update(&xprof_Timers[type]->accu); |
4fd8d08f | 232 | |
233 | /* Restart previous timer if we're not at the top level */ | |
234 | if (cstack_head > 0) { | |
26ac0430 AJ |
235 | cstack[cstack_head - 1].start = tt; |
236 | cstack[cstack_head - 1].stop = 0; | |
237 | return; | |
4fd8d08f | 238 | } |
239 | /* Get here? We're at the top level; unaccounted */ | |
240 | xp_UNACCOUNTED->start = tt; | |
88bfe092 | 241 | } |
242 | ||
243 | #endif /* USE_XPROF_STATS */ |